# Microsoft Recently developed a gradient boosting machine which will be tested on the 19h_44d dataset.

The wrapper for python can be found at:

https://github.com/ArdalanM/pyLightGBM/blob/master/examples/binary_classification.py

Note that path_to_exec needs to be specified and is the file path to the lightgbm installer.

A quickstart guide with paramter list can be found at: 

https://github.com/ArdalanM/pyLightGBM

Note that referencing path_to_exec has different formatting for different OS. 

A reference for how the parameters compare to XGBoost and small tutorial range of values see:
https://github.com/Microsoft/LightGBM/blob/master/docs/Parameters.md

In [2]:
from pylightgbm.models import GBMClassifier
import numpy as np
import pandas as pd
from sklearn import datasets, metrics, model_selection
from sklearn import metrics
from sklearn.model_selection import cross_val_predict

path_to_exec = "/home/tom/IdeaProjects/Astronomy Supervised Project/LightGBM/build/build/LightGBM/build/LightGBM/lightgbm"


df = pd.read_csv("TESSfield_19h_44d_combinedfeatures_try2.csv", index_col=0)
X = df.drop(['Ids', 'CatalogY', 'ManuleY', 'CombinedY', 'Catalog_Period',
             'Depth', 'Catalog_Epoch', 'SNR'], axis=1)

y = df['CombinedY']

def modelfit(alg, X, y, cv_folds=4):
    # StratifiedKFold automatically used by cross_val_predict on binary classification
    # bear in mind that this does not use trapezfoid rule
    # y_pred calculates the probabilities that each value is 1 or 0 using stratified cross validation
    # pr_auc calculates the area under a precision recall curve
    y_pred = cross_val_predict(alg, X, y, cv=cv_folds, 
                               method='predict_proba')[:,1]
    pr_auc = metrics.average_precision_score(y, y_pred)
    auc_roc = metrics.roc_auc_score(y, y_pred)
    print 'The pr and roc scores are: '
    return pr_auc, auc_roc


params = {'exec_path': path_to_exec, 'verbose': False}


clf = GBMClassifier(application='binary', **params)


modelfit(clf, X, y)

The pr and roc scores are: 


(0.76458332695049502, 0.90470449476675463)

We see that the default scores are quite high practically on par with the XGBoost eqivalent and it runs more quickly, only taking ~20 seconds on my laptop. 

First we can try using the optimal XGBoost values for the same dataset.  

In [3]:
params = {'exec_path': path_to_exec, 'verbose': False,
          'num_iterations':100, 'learning_rate':.1, 'num_leaves':1024,
          'min_data_in_leaf':1}

# binary tells GBM that its binary classification
clf = GBMClassifier(application='binary', **params)

modelfit(clf, X, y)

The pr and roc scores are: 


(0.76152641708855184, 0.89857018484999152)

Some notes about the parameters: 

Feature_fraction:
Selects part of features on each iteration, so if it is .80 it will select 80 percent of the features before training each tree. 

In [4]:
from sklearn.grid_search import GridSearchCV

param_test1 = {'num_leaves':[1, 10, 100, 1000], 'min_data_in_leaf':
               [1, 10, 100, 1000]}


gsearch = GridSearchCV(estimator = GBMClassifier(application='binary',
                                                 exec_path=path_to_exec,
                                                 verbose= False, 
                                                 num_threads=4), 
                       param_grid = param_test1, 
                       scoring='roc_auc',
                       n_jobs=4,
                       iid=False,
                       cv=4)

gsearch.fit(X, y)
gsearch1.grid_scores_, gsearch1.best_params_, gsearch1.best_score_

JoblibCalledProcessError: JoblibCalledProcessError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
/home/tom/anaconda3/envs/py27/lib/python2.7/runpy.py in _run_module_as_main(mod_name='ipykernel.__main__', alter_argv=1)
    169     pkg_name = mod_name.rpartition('.')[0]
    170     main_globals = sys.modules["__main__"].__dict__
    171     if alter_argv:
    172         sys.argv[0] = fname
    173     return _run_code(code, main_globals, None,
--> 174                      "__main__", fname, loader, pkg_name)
        fname = '/home/tom/anaconda3/envs/py27/lib/python2.7/site-packages/ipykernel/__main__.py'
        loader = <pkgutil.ImpLoader instance>
        pkg_name = 'ipykernel'
    175 
    176 def run_module(mod_name, init_globals=None,
    177                run_name=None, alter_sys=False):
    178     """Execute a module's code without importing it

...........................................................................
/home/tom/anaconda3/envs/py27/lib/python2.7/runpy.py in _run_code(code=<code object <module> at 0x7f31c9bc49b0, file "/...2.7/site-packages/ipykernel/__main__.py", line 1>, run_globals={'__builtins__': <module '__builtin__' (built-in)>, '__doc__': None, '__file__': '/home/tom/anaconda3/envs/py27/lib/python2.7/site-packages/ipykernel/__main__.py', '__loader__': <pkgutil.ImpLoader instance>, '__name__': '__main__', '__package__': 'ipykernel', 'app': <module 'ipykernel.kernelapp' from '/home/tom/an...python2.7/site-packages/ipykernel/kernelapp.pyc'>}, init_globals=None, mod_name='__main__', mod_fname='/home/tom/anaconda3/envs/py27/lib/python2.7/site-packages/ipykernel/__main__.py', mod_loader=<pkgutil.ImpLoader instance>, pkg_name='ipykernel')
     67         run_globals.update(init_globals)
     68     run_globals.update(__name__ = mod_name,
     69                        __file__ = mod_fname,
     70                        __loader__ = mod_loader,
     71                        __package__ = pkg_name)
---> 72     exec code in run_globals
        code = <code object <module> at 0x7f31c9bc49b0, file "/...2.7/site-packages/ipykernel/__main__.py", line 1>
        run_globals = {'__builtins__': <module '__builtin__' (built-in)>, '__doc__': None, '__file__': '/home/tom/anaconda3/envs/py27/lib/python2.7/site-packages/ipykernel/__main__.py', '__loader__': <pkgutil.ImpLoader instance>, '__name__': '__main__', '__package__': 'ipykernel', 'app': <module 'ipykernel.kernelapp' from '/home/tom/an...python2.7/site-packages/ipykernel/kernelapp.pyc'>}
     73     return run_globals
     74 
     75 def _run_module_code(code, init_globals=None,
     76                     mod_name=None, mod_fname=None,

...........................................................................
/home/tom/anaconda3/envs/py27/lib/python2.7/site-packages/ipykernel/__main__.py in <module>()
      1 
      2 
----> 3 
      4 if __name__ == '__main__':
      5     from ipykernel import kernelapp as app
      6     app.launch_new_instance()
      7 
      8 
      9 
     10 

...........................................................................
/home/tom/anaconda3/envs/py27/lib/python2.7/site-packages/traitlets/config/application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    648 
    649         If a global instance already exists, this reinitializes and starts it
    650         """
    651         app = cls.instance(**kwargs)
    652         app.initialize(argv)
--> 653         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    654 
    655 #-----------------------------------------------------------------------------
    656 # utility functions, for convenience
    657 #-----------------------------------------------------------------------------

...........................................................................
/home/tom/anaconda3/envs/py27/lib/python2.7/site-packages/ipykernel/kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    469             return self.subapp.start()
    470         if self.poller is not None:
    471             self.poller.start()
    472         self.kernel.start()
    473         try:
--> 474             ioloop.IOLoop.instance().start()
    475         except KeyboardInterrupt:
    476             pass
    477 
    478 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
/home/tom/anaconda3/envs/py27/lib/python2.7/site-packages/zmq/eventloop/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    157             PollIOLoop.configure(ZMQIOLoop)
    158         return PollIOLoop.current(*args, **kwargs)
    159     
    160     def start(self):
    161         try:
--> 162             super(ZMQIOLoop, self).start()
        self.start = <bound method ZMQIOLoop.start of <zmq.eventloop.ioloop.ZMQIOLoop object>>
    163         except ZMQError as e:
    164             if e.errno == ETERM:
    165                 # quietly return on ETERM
    166                 pass

...........................................................................
/home/tom/anaconda3/envs/py27/lib/python2.7/site-packages/tornado/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    882                 self._events.update(event_pairs)
    883                 while self._events:
    884                     fd, events = self._events.popitem()
    885                     try:
    886                         fd_obj, handler_func = self._handlers[fd]
--> 887                         handler_func(fd_obj, events)
        handler_func = <function null_wrapper>
        fd_obj = <zmq.sugar.socket.Socket object>
        events = 5
    888                     except (OSError, IOError) as e:
    889                         if errno_from_exception(e) == errno.EPIPE:
    890                             # Happens when the client closes the connection
    891                             pass

...........................................................................
/home/tom/anaconda3/envs/py27/lib/python2.7/site-packages/tornado/stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 5), **kwargs={})
    270         # Fast path when there are no active contexts.
    271         def null_wrapper(*args, **kwargs):
    272             try:
    273                 current_state = _state.contexts
    274                 _state.contexts = cap_contexts[0]
--> 275                 return fn(*args, **kwargs)
        args = (<zmq.sugar.socket.Socket object>, 5)
        kwargs = {}
    276             finally:
    277                 _state.contexts = current_state
    278         null_wrapper._wrapped = True
    279         return null_wrapper

...........................................................................
/home/tom/anaconda3/envs/py27/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=5)
    435             # dispatch events:
    436             if events & IOLoop.ERROR:
    437                 gen_log.error("got POLLERR event on ZMQStream, which doesn't make sense")
    438                 return
    439             if events & IOLoop.READ:
--> 440                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    441                 if not self.socket:
    442                     return
    443             if events & IOLoop.WRITE:
    444                 self._handle_send()

...........................................................................
/home/tom/anaconda3/envs/py27/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    467                 gen_log.error("RECV Error: %s"%zmq.strerror(e.errno))
    468         else:
    469             if self._recv_callback:
    470                 callback = self._recv_callback
    471                 # self._recv_callback = None
--> 472                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    473                 
    474         # self.update_state()
    475         
    476 

...........................................................................
/home/tom/anaconda3/envs/py27/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    409         close our socket."""
    410         try:
    411             # Use a NullContext to ensure that all StackContexts are run
    412             # inside our blanket exception handler rather than outside.
    413             with stack_context.NullContext():
--> 414                 callback(*args, **kwargs)
        callback = <function null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    415         except:
    416             gen_log.error("Uncaught exception, closing connection.",
    417                           exc_info=True)
    418             # Close the socket on an uncaught exception from a user callback

...........................................................................
/home/tom/anaconda3/envs/py27/lib/python2.7/site-packages/tornado/stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    270         # Fast path when there are no active contexts.
    271         def null_wrapper(*args, **kwargs):
    272             try:
    273                 current_state = _state.contexts
    274                 _state.contexts = cap_contexts[0]
--> 275                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    276             finally:
    277                 _state.contexts = current_state
    278         null_wrapper._wrapped = True
    279         return null_wrapper

...........................................................................
/home/tom/anaconda3/envs/py27/lib/python2.7/site-packages/ipykernel/kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    271         if self.control_stream:
    272             self.control_stream.on_recv(self.dispatch_control, copy=False)
    273 
    274         def make_dispatcher(stream):
    275             def dispatcher(msg):
--> 276                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    277             return dispatcher
    278 
    279         for s in self.shell_streams:
    280             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
/home/tom/anaconda3/envs/py27/lib/python2.7/site-packages/ipykernel/kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {u'allow_stdin': True, u'code': u"from sklearn.grid_search import GridSearchCV\n...es_, gsearch1.best_params_, gsearch1.best_score_", u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {'date': '2016-12-17T09:32:46.915586', u'msg_id': u'FC12AA4356F241C885DC947DEC01EF79', u'msg_type': u'execute_request', u'session': u'EC76079102D2402389656099721F7FDC', u'username': u'username', u'version': u'5.0'}, 'metadata': {}, 'msg_id': u'FC12AA4356F241C885DC947DEC01EF79', 'msg_type': u'execute_request', 'parent_header': {}})
    223             self.log.error("UNKNOWN MESSAGE TYPE: %r", msg_type)
    224         else:
    225             self.log.debug("%s: %s", msg_type, msg)
    226             self.pre_handler_hook()
    227             try:
--> 228                 handler(stream, idents, msg)
        handler = <bound method IPythonKernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = ['EC76079102D2402389656099721F7FDC']
        msg = {'buffers': [], 'content': {u'allow_stdin': True, u'code': u"from sklearn.grid_search import GridSearchCV\n...es_, gsearch1.best_params_, gsearch1.best_score_", u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {'date': '2016-12-17T09:32:46.915586', u'msg_id': u'FC12AA4356F241C885DC947DEC01EF79', u'msg_type': u'execute_request', u'session': u'EC76079102D2402389656099721F7FDC', u'username': u'username', u'version': u'5.0'}, 'metadata': {}, 'msg_id': u'FC12AA4356F241C885DC947DEC01EF79', 'msg_type': u'execute_request', 'parent_header': {}}
    229             except Exception:
    230                 self.log.error("Exception in message handler:", exc_info=True)
    231             finally:
    232                 self.post_handler_hook()

...........................................................................
/home/tom/anaconda3/envs/py27/lib/python2.7/site-packages/ipykernel/kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=['EC76079102D2402389656099721F7FDC'], parent={'buffers': [], 'content': {u'allow_stdin': True, u'code': u"from sklearn.grid_search import GridSearchCV\n...es_, gsearch1.best_params_, gsearch1.best_score_", u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {'date': '2016-12-17T09:32:46.915586', u'msg_id': u'FC12AA4356F241C885DC947DEC01EF79', u'msg_type': u'execute_request', u'session': u'EC76079102D2402389656099721F7FDC', u'username': u'username', u'version': u'5.0'}, 'metadata': {}, 'msg_id': u'FC12AA4356F241C885DC947DEC01EF79', 'msg_type': u'execute_request', 'parent_header': {}})
    385         if not silent:
    386             self.execution_count += 1
    387             self._publish_execute_input(code, parent, self.execution_count)
    388 
    389         reply_content = self.do_execute(code, silent, store_history,
--> 390                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = True
    391 
    392         # Flush output before sending the reply.
    393         sys.stdout.flush()
    394         sys.stderr.flush()

...........................................................................
/home/tom/anaconda3/envs/py27/lib/python2.7/site-packages/ipykernel/ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code=u"from sklearn.grid_search import GridSearchCV\n...es_, gsearch1.best_params_, gsearch1.best_score_", silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    191 
    192         self._forward_input(allow_stdin)
    193 
    194         reply_content = {}
    195         try:
--> 196             res = shell.run_cell(code, store_history=store_history, silent=silent)
        res = undefined
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = u"from sklearn.grid_search import GridSearchCV\n...es_, gsearch1.best_params_, gsearch1.best_score_"
        store_history = True
        silent = False
    197         finally:
    198             self._restore_input()
    199 
    200         if res.error_before_exec is not None:

...........................................................................
/home/tom/anaconda3/envs/py27/lib/python2.7/site-packages/ipykernel/zmqshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, *args=(u"from sklearn.grid_search import GridSearchCV\n...es_, gsearch1.best_params_, gsearch1.best_score_",), **kwargs={'silent': False, 'store_history': True})
    496             )
    497         self.payload_manager.write_payload(payload)
    498 
    499     def run_cell(self, *args, **kwargs):
    500         self._last_traceback = None
--> 501         return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
        self.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        args = (u"from sklearn.grid_search import GridSearchCV\n...es_, gsearch1.best_params_, gsearch1.best_score_",)
        kwargs = {'silent': False, 'store_history': True}
    502 
    503     def _showtraceback(self, etype, evalue, stb):
    504         # try to preserve ordering of tracebacks and print statements
    505         sys.stdout.flush()

...........................................................................
/home/tom/anaconda3/envs/py27/lib/python2.7/site-packages/IPython/core/interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell=u"from sklearn.grid_search import GridSearchCV\n...es_, gsearch1.best_params_, gsearch1.best_score_", store_history=True, silent=False, shell_futures=True)
   2712                 self.displayhook.exec_result = result
   2713 
   2714                 # Execute the user code
   2715                 interactivity = "none" if silent else self.ast_node_interactivity
   2716                 has_raised = self.run_ast_nodes(code_ast.body, cell_name,
-> 2717                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler instance>
   2718                 
   2719                 self.last_execution_succeeded = not has_raised
   2720 
   2721                 # Reset this so later displayed values do not modify the

...........................................................................
/home/tom/anaconda3/envs/py27/lib/python2.7/site-packages/IPython/core/interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.ImportFrom object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Expr object>, <_ast.Expr object>], cell_name='<ipython-input-4-0a5580a0418d>', interactivity='last', compiler=<IPython.core.compilerop.CachingCompiler instance>, result=<ExecutionResult object at 7f31bc17dd10, executi..._before_exec=None error_in_exec=None result=None>)
   2816 
   2817         try:
   2818             for i, node in enumerate(to_run_exec):
   2819                 mod = ast.Module([node])
   2820                 code = compiler(mod, cell_name, "exec")
-> 2821                 if self.run_code(code, result):
        self.run_code = <bound method ZMQInteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0x7f318e0c1bb0, file "<ipython-input-4-0a5580a0418d>", line 17>
        result = <ExecutionResult object at 7f31bc17dd10, executi..._before_exec=None error_in_exec=None result=None>
   2822                     return True
   2823 
   2824             for i, node in enumerate(to_run_interactive):
   2825                 mod = ast.Interactive([node])

...........................................................................
/home/tom/anaconda3/envs/py27/lib/python2.7/site-packages/IPython/core/interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x7f318e0c1bb0, file "<ipython-input-4-0a5580a0418d>", line 17>, result=<ExecutionResult object at 7f31bc17dd10, executi..._before_exec=None error_in_exec=None result=None>)
   2876         outflag = 1  # happens in more places, so it's easier as default
   2877         try:
   2878             try:
   2879                 self.hooks.pre_run_code_hook()
   2880                 #rprint('Running code', repr(code_obj)) # dbg
-> 2881                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0x7f318e0c1bb0, file "<ipython-input-4-0a5580a0418d>", line 17>
        self.user_global_ns = {'GBMClassifier': <class 'pylightgbm.models.GBMClassifier'>, 'GridSearchCV': <class 'sklearn.grid_search.GridSearchCV'>, 'In': ['', u"from sklearn.grid_search import GridSearchCV\n...es_, gsearch1.best_params_, gsearch1.best_score_", u'from pylightgbm.models import GBMClassifier\ni...n=\'binary\', **params)\n\n\nmodelfit(clf, X, y)', u"params = {'exec_path': path_to_exec, 'verbose'...ation='binary', **params)\n\nmodelfit(clf, X, y)", u"from sklearn.grid_search import GridSearchCV\n...es_, gsearch1.best_params_, gsearch1.best_score_"], 'Out': {2: (0.76458332695049502, 0.90470449476675463), 3: (0.76152641708855184, 0.89857018484999152)}, 'X':                   BLS_Tc_1_0  BLS_SN_1_0  BLS_SR...10.156305  13.841632  

[31343 rows x 22 columns], '_': (0.76152641708855184, 0.89857018484999152), '_2': (0.76458332695049502, 0.90470449476675463), '_3': (0.76152641708855184, 0.89857018484999152), '__': (0.76458332695049502, 0.90470449476675463), '___': '', ...}
        self.user_ns = {'GBMClassifier': <class 'pylightgbm.models.GBMClassifier'>, 'GridSearchCV': <class 'sklearn.grid_search.GridSearchCV'>, 'In': ['', u"from sklearn.grid_search import GridSearchCV\n...es_, gsearch1.best_params_, gsearch1.best_score_", u'from pylightgbm.models import GBMClassifier\ni...n=\'binary\', **params)\n\n\nmodelfit(clf, X, y)', u"params = {'exec_path': path_to_exec, 'verbose'...ation='binary', **params)\n\nmodelfit(clf, X, y)", u"from sklearn.grid_search import GridSearchCV\n...es_, gsearch1.best_params_, gsearch1.best_score_"], 'Out': {2: (0.76458332695049502, 0.90470449476675463), 3: (0.76152641708855184, 0.89857018484999152)}, 'X':                   BLS_Tc_1_0  BLS_SN_1_0  BLS_SR...10.156305  13.841632  

[31343 rows x 22 columns], '_': (0.76152641708855184, 0.89857018484999152), '_2': (0.76458332695049502, 0.90470449476675463), '_3': (0.76152641708855184, 0.89857018484999152), '__': (0.76458332695049502, 0.90470449476675463), '___': '', ...}
   2882             finally:
   2883                 # Reset our crash handler in place
   2884                 sys.excepthook = old_excepthook
   2885         except SystemExit as e:

...........................................................................
/home/tom/IdeaProjects/Astronomy Supervised Project/<ipython-input-4-0a5580a0418d> in <module>()
     12                        scoring='roc_auc',
     13                        n_jobs=4,
     14                        iid=False,
     15                        cv=4)
     16 
---> 17 gsearch.fit(X, y)
     18 gsearch1.grid_scores_, gsearch1.best_params_, gsearch1.best_score_
     19 
     20 
     21 

...........................................................................
/home/tom/anaconda3/envs/py27/lib/python2.7/site-packages/sklearn/grid_search.py in fit(self=GridSearchCV(cv=4, error_score='raise',
       e..._jobs', refit=True, scoring='roc_auc', verbose=0), X=                  BLS_Tc_1_0  BLS_SN_1_0  BLS_SR...10.156305  13.841632  

[31343 rows x 22 columns], y=BLS_Period_1_0
1.415497     0
12.771876    0
3.2...   0
1.874365     0
Name: CombinedY, dtype: int64)
    808         y : array-like, shape = [n_samples] or [n_samples, n_output], optional
    809             Target relative to X for classification or regression;
    810             None for unsupervised learning.
    811 
    812         """
--> 813         return self._fit(X, y, ParameterGrid(self.param_grid))
        self._fit = <bound method GridSearchCV._fit of GridSearchCV(...jobs', refit=True, scoring='roc_auc', verbose=0)>
        X =                   BLS_Tc_1_0  BLS_SN_1_0  BLS_SR...10.156305  13.841632  

[31343 rows x 22 columns]
        y = BLS_Period_1_0
1.415497     0
12.771876    0
3.2...   0
1.874365     0
Name: CombinedY, dtype: int64
        self.param_grid = {'min_data_in_leaf': [1, 10, 100, 1000], 'num_leaves': [1, 10, 100, 1000]}
    814 
    815 
    816 class RandomizedSearchCV(BaseSearchCV):
    817     """Randomized search on hyper parameters.

...........................................................................
/home/tom/anaconda3/envs/py27/lib/python2.7/site-packages/sklearn/grid_search.py in _fit(self=GridSearchCV(cv=4, error_score='raise',
       e..._jobs', refit=True, scoring='roc_auc', verbose=0), X=                  BLS_Tc_1_0  BLS_SN_1_0  BLS_SR...10.156305  13.841632  

[31343 rows x 22 columns], y=BLS_Period_1_0
1.415497     0
12.771876    0
3.2...   0
1.874365     0
Name: CombinedY, dtype: int64, parameter_iterable=<sklearn.grid_search.ParameterGrid object>)
    556         )(
    557             delayed(_fit_and_score)(clone(base_estimator), X, y, self.scorer_,
    558                                     train, test, self.verbose, parameters,
    559                                     self.fit_params, return_parameters=True,
    560                                     error_score=self.error_score)
--> 561                 for parameters in parameter_iterable
        parameters = undefined
        parameter_iterable = <sklearn.grid_search.ParameterGrid object>
    562                 for train, test in cv)
    563 
    564         # Out is a list of triplet: score, estimator, n_test_samples
    565         n_fits = len(out)

...........................................................................
/home/tom/anaconda3/envs/py27/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=Parallel(n_jobs=4), iterable=<generator object <genexpr>>)
    763             if pre_dispatch == "all" or n_jobs == 1:
    764                 # The iterable was consumed all at once by the above for loop.
    765                 # No need to wait for async callbacks to trigger to
    766                 # consumption.
    767                 self._iterating = False
--> 768             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=4)>
    769             # Make sure that we get a last message telling us we are done
    770             elapsed_time = time.time() - self._start_time
    771             self._print('Done %3i out of %3i | elapsed: %s finished',
    772                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
CalledProcessError                                 Sat Dec 17 09:32:51 2016
PID: 3832           Python 2.7.12: /home/tom/anaconda3/envs/py27/bin/python
...........................................................................
/home/tom/anaconda3/envs/py27/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = <function _fit_and_score>
        args = (GBMClassifier(application='binary', bagging_frac..., tree_learner='serial', valid='', verbose=False),                   BLS_Tc_1_0  BLS_SN_1_0  BLS_SR...10.156305  13.841632  

[31343 rows x 22 columns], BLS_Period_1_0
1.415497     0
12.771876    0
3.2...   0
1.874365     0
Name: CombinedY, dtype: int64, make_scorer(roc_auc_score, needs_threshold=True), array([ 6451,  6514,  6516, ..., 31340, 31341, 31342]), array([   0,    1,    2, ..., 7880, 7881, 7882]), 0, {'min_data_in_leaf': 1, 'num_leaves': 1}, {})
        kwargs = {'error_score': 'raise', 'return_parameters': True}
        self.items = [(<function _fit_and_score>, (GBMClassifier(application='binary', bagging_frac..., tree_learner='serial', valid='', verbose=False),                   BLS_Tc_1_0  BLS_SN_1_0  BLS_SR...10.156305  13.841632  

[31343 rows x 22 columns], BLS_Period_1_0
1.415497     0
12.771876    0
3.2...   0
1.874365     0
Name: CombinedY, dtype: int64, make_scorer(roc_auc_score, needs_threshold=True), array([ 6451,  6514,  6516, ..., 31340, 31341, 31342]), array([   0,    1,    2, ..., 7880, 7881, 7882]), 0, {'min_data_in_leaf': 1, 'num_leaves': 1}, {}), {'error_score': 'raise', 'return_parameters': True})]
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
/home/tom/anaconda3/envs/py27/lib/python2.7/site-packages/sklearn/cross_validation.py in _fit_and_score(estimator=GBMClassifier(application='binary', bagging_frac..., tree_learner='serial', valid='', verbose=False), X=                  BLS_Tc_1_0  BLS_SN_1_0  BLS_SR...10.156305  13.841632  

[31343 rows x 22 columns], y=BLS_Period_1_0
1.415497     0
12.771876    0
3.2...   0
1.874365     0
Name: CombinedY, dtype: int64, scorer=make_scorer(roc_auc_score, needs_threshold=True), train=array([ 6451,  6514,  6516, ..., 31340, 31341, 31342]), test=array([   0,    1,    2, ..., 7880, 7881, 7882]), verbose=0, parameters={'min_data_in_leaf': 1, 'num_leaves': 1}, fit_params={}, return_train_score=False, return_parameters=True, error_score='raise')
   1608 
   1609     try:
   1610         if y_train is None:
   1611             estimator.fit(X_train, **fit_params)
   1612         else:
-> 1613             estimator.fit(X_train, y_train, **fit_params)
        estimator.fit = <bound method GBMClassifier.fit of GBMClassifier... tree_learner='serial', valid='', verbose=False)>
        X_train =                   BLS_Tc_1_0  BLS_SN_1_0  BLS_SR...10.156305  13.841632  

[23506 rows x 22 columns]
        y_train = BLS_Period_1_0
3.671844     1
5.264681     1
5.2...   0
1.874365     0
Name: CombinedY, dtype: int64
        fit_params = {}
   1614 
   1615     except Exception as e:
   1616         if error_score == 'raise':
   1617             raise

...........................................................................
/home/tom/anaconda3/envs/py27/lib/python2.7/site-packages/pylightgbm/models.py in fit(self=GBMClassifier(application='binary', bagging_frac..., tree_learner='serial', valid='', verbose=False), X=                  BLS_Tc_1_0  BLS_SN_1_0  BLS_SR...10.156305  13.841632  

[23506 rows x 22 columns], y=BLS_Period_1_0
3.671844     1
5.264681     1
5.2...   0
1.874365     0
Name: CombinedY, dtype: int64, test_data=None)
     75             conf_filepath = os.path.join(tmp_dir, "train.conf")
     76             with open(conf_filepath, 'w') as f:
     77                 f.writelines(calls)
     78 
     79             process = subprocess.check_output([self.exec_path, "config={}".format(conf_filepath)],
---> 80                                               universal_newlines=True)
     81 
     82         else:
     83             process = subprocess.check_output([self.exec_path, "config={}".format(self.config)],
     84                                               universal_newlines=True)

...........................................................................
/home/tom/anaconda3/envs/py27/lib/python2.7/subprocess.py in check_output(*popenargs=(['/home/tom/IdeaProjects/Astronomy Supervised Proj...tGBM/build/build/LightGBM/build/LightGBM/lightgbm', 'config=/tmp/tmpYC3v42/train.conf'],), **kwargs={'universal_newlines': True})
    569     retcode = process.poll()
    570     if retcode:
    571         cmd = kwargs.get("args")
    572         if cmd is None:
    573             cmd = popenargs[0]
--> 574         raise CalledProcessError(retcode, cmd, output=output)
        retcode = 1
        cmd = ['/home/tom/IdeaProjects/Astronomy Supervised Proj...tGBM/build/build/LightGBM/build/LightGBM/lightgbm', 'config=/tmp/tmpYC3v42/train.conf']
        output = '[LightGBM] [Error] Unknown parameter in config file: valid=\n'
    575     return output
    576 
    577 
    578 def list2cmdline(seq):

CalledProcessError: Command '['/home/tom/IdeaProjects/Astronomy Supervised Project/LightGBM/build/build/LightGBM/build/LightGBM/lightgbm', 'config=/tmp/tmpYC3v42/train.conf']' returned non-zero exit status 1
___________________________________________________________________________