In [1]:
import glob
import pandas as pd
import re
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from collections import Counter

import missingno as msno
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.preprocessing import StandardScaler, Imputer, FunctionTransformer
from sklearn.feature_selection import f_regression, SelectKBest
from sklearn.feature_selection import VarianceThreshold
from sklearn.linear_model import LinearRegression, SGDRegressor, BayesianRidge, Lasso
from sklearn.base import BaseEstimator, TransformerMixin
from mlxtend.preprocessing import DenseTransformer
from sklearn.decomposition import PCA
from sklearn.ensemble import GradientBoostingRegressor, AdaBoostRegressor, RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.tree import DecisionTreeRegressor

from sklearn.naive_bayes import MultinomialNB
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression

pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 200)

In [2]:
class ItemSelector(BaseEstimator, TransformerMixin):
    def __init__(self, keys):
        self.keys = keys

    def fit(self, x, y=None):
        return self

    def transform(self, data_dict):
        return data_dict[self.keys]

In [3]:
# Load "data-prep.csv"
jobs_df = pd.read_csv("data-prep.csv")

# Drop "Unnamed: 0" column:
jobs_df = jobs_df.drop(columns="Unnamed: 0")

# Extract out rows without 'salary' into another dataframe:
no_salary_df = jobs_df[jobs_df["salary_min"].isnull()].reset_index(drop=True)
jobs_df = jobs_df[jobs_df["salary_min"].notnull()].reset_index(drop=True)

# Perform train-test split:
title_columns = ['title_intern',
 'title_lead',
 'title_principal',
 'title_chief',
 'title_senior',
 'title_junior',
 'title_data',
 'title_ml',
 'title_database',
 'title_ai',
 'title_business',
 'title_software',
 'title_sql',
 'title_financial',
 'title_analyst',
 'title_intelligence',
 'title_engineer',
 'title_developer',
 'title_scientist',
 'title_administrator',
 'title_architect']

non_title_columns = [column for column in jobs_df.columns.values if column not in title_columns]

X_train, X_test, y_train, y_test = train_test_split(jobs_df[non_title_columns], jobs_df[title_columns], test_size=0.3, random_state=42)

non_text_columns = [column for column in non_title_columns if column != "details"]

In [4]:
# OneVsRestClassifier(MultinomialNB(
#         fit_prior=True, class_prior=None))
# OneVsRestClassifier(LinearSVC())
# OneVsRestClassifier(LogisticRegression(solver='sag'))

In [5]:
clf_pipelines = []

vectorizers = [CountVectorizer(min_df = 0.01, max_df = 0.50), TfidfVectorizer(min_df = 0.01, max_df = 0.50)]
classifiers =[
    OneVsRestClassifier(MultinomialNB(
            fit_prior=True, class_prior=None), n_jobs=-1),
    OneVsRestClassifier(LinearSVC(), n_jobs=-1),
    OneVsRestClassifier(LogisticRegression(solver='sag'), n_jobs=-1)
]

for each_vec in vectorizers:
    for each_clf in classifiers:
        clf_pipelines.append(Pipeline([
            ('encode', FeatureUnion([
                    ('tokenize', Pipeline([
                    ('extract_text', ItemSelector(keys='details')),
                    ('vectorize', each_vec),
                    ('to_dense', DenseTransformer())
                ])),
            ('combine', ItemSelector(keys=non_text_columns))
            ])),
            ('impute', Imputer()),
#             ('standardize', StandardScaler()),
            ('reduce_dim', PCA(n_components=15, random_state=42)),
            ('classify', each_clf)
        ]))

for each_vec in vectorizers:
    for each_clf in classifiers:
        clf_pipelines.append(Pipeline([
            ('encode', FeatureUnion([
                    ('tokenize', Pipeline([
                    ('extract_text', ItemSelector(keys='details')),
                    ('vectorize', each_vec),
                    ('to_dense', DenseTransformer())
                ])),
            ('combine', ItemSelector(keys=non_text_columns))
            ])),
            ('impute', Imputer()),
#             ('standardize', StandardScaler()),
            ('classify', each_clf)
        ]))

param_grid={'encode__tokenize__vectorize__max_features': [None, 2500, 5000],
            'encode__tokenize__vectorize__ngram_range': [(1, 1), (1, 2)],
            'encode__tokenize__vectorize__stop_words': [None, 'english']}

len(clf_pipelines)

12

In [6]:
clf_pipelines[0].fit(X_train, np.array(y_train))
clf_pipelines[0].score(X_test, np.array(y_test))

  str(classes[c]))
  str(classes[c]))


JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
/Users/jasminetan/anaconda2/lib/python2.7/runpy.py in _run_module_as_main(mod_name='ipykernel_launcher', alter_argv=1)
    169     pkg_name = mod_name.rpartition('.')[0]
    170     main_globals = sys.modules["__main__"].__dict__
    171     if alter_argv:
    172         sys.argv[0] = fname
    173     return _run_code(code, main_globals, None,
--> 174                      "__main__", fname, loader, pkg_name)
        fname = '/Users/jasminetan/anaconda2/lib/python2.7/site-packages/ipykernel_launcher.py'
        loader = <pkgutil.ImpLoader instance>
        pkg_name = ''
    175 
    176 def run_module(mod_name, init_globals=None,
    177                run_name=None, alter_sys=False):
    178     """Execute a module's code without importing it

...........................................................................
/Users/jasminetan/anaconda2/lib/python2.7/runpy.py in _run_code(code=<code object <module> at 0x10d4f62b0, file "/Use...2.7/site-packages/ipykernel_launcher.py", line 5>, run_globals={'__builtins__': <module '__builtin__' (built-in)>, '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': '/Users/jasminetan/anaconda2/lib/python2.7/site-packages/ipykernel_launcher.py', '__loader__': <pkgutil.ImpLoader instance>, '__name__': '__main__', '__package__': '', 'app': <module 'ipykernel.kernelapp' from '/Users/jasmi...python2.7/site-packages/ipykernel/kernelapp.pyc'>, 'sys': <module 'sys' (built-in)>}, init_globals=None, mod_name='__main__', mod_fname='/Users/jasminetan/anaconda2/lib/python2.7/site-packages/ipykernel_launcher.py', mod_loader=<pkgutil.ImpLoader instance>, pkg_name='')
     67         run_globals.update(init_globals)
     68     run_globals.update(__name__ = mod_name,
     69                        __file__ = mod_fname,
     70                        __loader__ = mod_loader,
     71                        __package__ = pkg_name)
---> 72     exec code in run_globals
        code = <code object <module> at 0x10d4f62b0, file "/Use...2.7/site-packages/ipykernel_launcher.py", line 5>
        run_globals = {'__builtins__': <module '__builtin__' (built-in)>, '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': '/Users/jasminetan/anaconda2/lib/python2.7/site-packages/ipykernel_launcher.py', '__loader__': <pkgutil.ImpLoader instance>, '__name__': '__main__', '__package__': '', 'app': <module 'ipykernel.kernelapp' from '/Users/jasmi...python2.7/site-packages/ipykernel/kernelapp.pyc'>, 'sys': <module 'sys' (built-in)>}
     73     return run_globals
     74 
     75 def _run_module_code(code, init_globals=None,
     76                     mod_name=None, mod_fname=None,

...........................................................................
/Users/jasminetan/anaconda2/lib/python2.7/site-packages/ipykernel_launcher.py in <module>()
     11     # This is added back by InteractiveShellApp.init_path()
     12     if sys.path[0] == '':
     13         del sys.path[0]
     14 
     15     from ipykernel import kernelapp as app
---> 16     app.launch_new_instance()

...........................................................................
/Users/jasminetan/anaconda2/lib/python2.7/site-packages/traitlets/config/application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    653 
    654         If a global instance already exists, this reinitializes and starts it
    655         """
    656         app = cls.instance(**kwargs)
    657         app.initialize(argv)
--> 658         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    659 
    660 #-----------------------------------------------------------------------------
    661 # utility functions, for convenience
    662 #-----------------------------------------------------------------------------

...........................................................................
/Users/jasminetan/anaconda2/lib/python2.7/site-packages/ipykernel/kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    473         if self.poller is not None:
    474             self.poller.start()
    475         self.kernel.start()
    476         self.io_loop = ioloop.IOLoop.current()
    477         try:
--> 478             self.io_loop.start()
        self.io_loop.start = <bound method ZMQIOLoop.start of <zmq.eventloop.ioloop.ZMQIOLoop object>>
    479         except KeyboardInterrupt:
    480             pass
    481 
    482 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
/Users/jasminetan/anaconda2/lib/python2.7/site-packages/zmq/eventloop/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    172             )
    173         return loop
    174     
    175     def start(self):
    176         try:
--> 177             super(ZMQIOLoop, self).start()
        self.start = <bound method ZMQIOLoop.start of <zmq.eventloop.ioloop.ZMQIOLoop object>>
    178         except ZMQError as e:
    179             if e.errno == ETERM:
    180                 # quietly return on ETERM
    181                 pass

...........................................................................
/Users/jasminetan/anaconda2/lib/python2.7/site-packages/tornado/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    883                 self._events.update(event_pairs)
    884                 while self._events:
    885                     fd, events = self._events.popitem()
    886                     try:
    887                         fd_obj, handler_func = self._handlers[fd]
--> 888                         handler_func(fd_obj, events)
        handler_func = <function null_wrapper>
        fd_obj = <zmq.sugar.socket.Socket object>
        events = 5
    889                     except (OSError, IOError) as e:
    890                         if errno_from_exception(e) == errno.EPIPE:
    891                             # Happens when the client closes the connection
    892                             pass

...........................................................................
/Users/jasminetan/anaconda2/lib/python2.7/site-packages/tornado/stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 5), **kwargs={})
    272         # Fast path when there are no active contexts.
    273         def null_wrapper(*args, **kwargs):
    274             try:
    275                 current_state = _state.contexts
    276                 _state.contexts = cap_contexts[0]
--> 277                 return fn(*args, **kwargs)
        args = (<zmq.sugar.socket.Socket object>, 5)
        kwargs = {}
    278             finally:
    279                 _state.contexts = current_state
    280         null_wrapper._wrapped = True
    281         return null_wrapper

...........................................................................
/Users/jasminetan/anaconda2/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=5)
    435             # dispatch events:
    436             if events & IOLoop.ERROR:
    437                 gen_log.error("got POLLERR event on ZMQStream, which doesn't make sense")
    438                 return
    439             if events & IOLoop.READ:
--> 440                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    441                 if not self.socket:
    442                     return
    443             if events & IOLoop.WRITE:
    444                 self._handle_send()

...........................................................................
/Users/jasminetan/anaconda2/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    467                 gen_log.error("RECV Error: %s"%zmq.strerror(e.errno))
    468         else:
    469             if self._recv_callback:
    470                 callback = self._recv_callback
    471                 # self._recv_callback = None
--> 472                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    473                 
    474         # self.update_state()
    475         
    476 

...........................................................................
/Users/jasminetan/anaconda2/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    409         close our socket."""
    410         try:
    411             # Use a NullContext to ensure that all StackContexts are run
    412             # inside our blanket exception handler rather than outside.
    413             with stack_context.NullContext():
--> 414                 callback(*args, **kwargs)
        callback = <function null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    415         except:
    416             gen_log.error("Uncaught exception, closing connection.",
    417                           exc_info=True)
    418             # Close the socket on an uncaught exception from a user callback

...........................................................................
/Users/jasminetan/anaconda2/lib/python2.7/site-packages/tornado/stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    272         # Fast path when there are no active contexts.
    273         def null_wrapper(*args, **kwargs):
    274             try:
    275                 current_state = _state.contexts
    276                 _state.contexts = cap_contexts[0]
--> 277                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    278             finally:
    279                 _state.contexts = current_state
    280         null_wrapper._wrapped = True
    281         return null_wrapper

...........................................................................
/Users/jasminetan/anaconda2/lib/python2.7/site-packages/ipykernel/kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    278         if self.control_stream:
    279             self.control_stream.on_recv(self.dispatch_control, copy=False)
    280 
    281         def make_dispatcher(stream):
    282             def dispatcher(msg):
--> 283                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    284             return dispatcher
    285 
    286         for s in self.shell_streams:
    287             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
/Users/jasminetan/anaconda2/lib/python2.7/site-packages/ipykernel/kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {u'allow_stdin': True, u'code': u'clf_pipelines[0].fit(X_train, np.array(y_train...clf_pipelines[0].score(X_test, np.array(y_test))', u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {u'date': datetime.datetime(2018, 4, 27, 0, 53, 19, 132445, tzinfo=tzutc()), u'msg_id': u'b9903937e745506d67fe6520e2f44dc6', u'msg_type': u'execute_request', u'session': u'8046c107cd934ca652bbd91392426c7d', u'username': u'', u'version': u'5.2'}, 'metadata': {}, 'msg_id': u'b9903937e745506d67fe6520e2f44dc6', 'msg_type': u'execute_request', 'parent_header': {}})
    228             self.log.warn("Unknown message type: %r", msg_type)
    229         else:
    230             self.log.debug("%s: %s", msg_type, msg)
    231             self.pre_handler_hook()
    232             try:
--> 233                 handler(stream, idents, msg)
        handler = <bound method IPythonKernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = ['8046c107cd934ca652bbd91392426c7d']
        msg = {'buffers': [], 'content': {u'allow_stdin': True, u'code': u'clf_pipelines[0].fit(X_train, np.array(y_train...clf_pipelines[0].score(X_test, np.array(y_test))', u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {u'date': datetime.datetime(2018, 4, 27, 0, 53, 19, 132445, tzinfo=tzutc()), u'msg_id': u'b9903937e745506d67fe6520e2f44dc6', u'msg_type': u'execute_request', u'session': u'8046c107cd934ca652bbd91392426c7d', u'username': u'', u'version': u'5.2'}, 'metadata': {}, 'msg_id': u'b9903937e745506d67fe6520e2f44dc6', 'msg_type': u'execute_request', 'parent_header': {}}
    234             except Exception:
    235                 self.log.error("Exception in message handler:", exc_info=True)
    236             finally:
    237                 self.post_handler_hook()

...........................................................................
/Users/jasminetan/anaconda2/lib/python2.7/site-packages/ipykernel/kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=['8046c107cd934ca652bbd91392426c7d'], parent={'buffers': [], 'content': {u'allow_stdin': True, u'code': u'clf_pipelines[0].fit(X_train, np.array(y_train...clf_pipelines[0].score(X_test, np.array(y_test))', u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {u'date': datetime.datetime(2018, 4, 27, 0, 53, 19, 132445, tzinfo=tzutc()), u'msg_id': u'b9903937e745506d67fe6520e2f44dc6', u'msg_type': u'execute_request', u'session': u'8046c107cd934ca652bbd91392426c7d', u'username': u'', u'version': u'5.2'}, 'metadata': {}, 'msg_id': u'b9903937e745506d67fe6520e2f44dc6', 'msg_type': u'execute_request', 'parent_header': {}})
    394         if not silent:
    395             self.execution_count += 1
    396             self._publish_execute_input(code, parent, self.execution_count)
    397 
    398         reply_content = self.do_execute(code, silent, store_history,
--> 399                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = True
    400 
    401         # Flush output before sending the reply.
    402         sys.stdout.flush()
    403         sys.stderr.flush()

...........................................................................
/Users/jasminetan/anaconda2/lib/python2.7/site-packages/ipykernel/ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code=u'clf_pipelines[0].fit(X_train, np.array(y_train...clf_pipelines[0].score(X_test, np.array(y_test))', silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    203 
    204         self._forward_input(allow_stdin)
    205 
    206         reply_content = {}
    207         try:
--> 208             res = shell.run_cell(code, store_history=store_history, silent=silent)
        res = undefined
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = u'clf_pipelines[0].fit(X_train, np.array(y_train...clf_pipelines[0].score(X_test, np.array(y_test))'
        store_history = True
        silent = False
    209         finally:
    210             self._restore_input()
    211 
    212         if res.error_before_exec is not None:

...........................................................................
/Users/jasminetan/anaconda2/lib/python2.7/site-packages/ipykernel/zmqshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, *args=(u'clf_pipelines[0].fit(X_train, np.array(y_train...clf_pipelines[0].score(X_test, np.array(y_test))',), **kwargs={'silent': False, 'store_history': True})
    532             )
    533         self.payload_manager.write_payload(payload)
    534 
    535     def run_cell(self, *args, **kwargs):
    536         self._last_traceback = None
--> 537         return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
        self.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        args = (u'clf_pipelines[0].fit(X_train, np.array(y_train...clf_pipelines[0].score(X_test, np.array(y_test))',)
        kwargs = {'silent': False, 'store_history': True}
    538 
    539     def _showtraceback(self, etype, evalue, stb):
    540         # try to preserve ordering of tracebacks and print statements
    541         sys.stdout.flush()

...........................................................................
/Users/jasminetan/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell=u'clf_pipelines[0].fit(X_train, np.array(y_train...clf_pipelines[0].score(X_test, np.array(y_test))', store_history=True, silent=False, shell_futures=True)
   2713                 self.displayhook.exec_result = result
   2714 
   2715                 # Execute the user code
   2716                 interactivity = "none" if silent else self.ast_node_interactivity
   2717                 has_raised = self.run_ast_nodes(code_ast.body, cell_name,
-> 2718                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler instance>
   2719                 
   2720                 self.last_execution_succeeded = not has_raised
   2721 
   2722                 # Reset this so later displayed values do not modify the

...........................................................................
/Users/jasminetan/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.Expr object>, <_ast.Expr object>], cell_name='<ipython-input-6-10aba4672960>', interactivity='last', compiler=<IPython.core.compilerop.CachingCompiler instance>, result=<ExecutionResult object at 10f5e54d0, execution_..._before_exec=None error_in_exec=None result=None>)
   2817 
   2818         try:
   2819             for i, node in enumerate(to_run_exec):
   2820                 mod = ast.Module([node])
   2821                 code = compiler(mod, cell_name, "exec")
-> 2822                 if self.run_code(code, result):
        self.run_code = <bound method ZMQInteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0x1a152031b0, file "<ipython-input-6-10aba4672960>", line 1>
        result = <ExecutionResult object at 10f5e54d0, execution_..._before_exec=None error_in_exec=None result=None>
   2823                     return True
   2824 
   2825             for i, node in enumerate(to_run_interactive):
   2826                 mod = ast.Interactive([node])

...........................................................................
/Users/jasminetan/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x1a152031b0, file "<ipython-input-6-10aba4672960>", line 1>, result=<ExecutionResult object at 10f5e54d0, execution_..._before_exec=None error_in_exec=None result=None>)
   2877         outflag = 1  # happens in more places, so it's easier as default
   2878         try:
   2879             try:
   2880                 self.hooks.pre_run_code_hook()
   2881                 #rprint('Running code', repr(code_obj)) # dbg
-> 2882                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0x1a152031b0, file "<ipython-input-6-10aba4672960>", line 1>
        self.user_global_ns = {'AdaBoostRegressor': <class 'sklearn.ensemble.weight_boosting.AdaBoostRegressor'>, 'BaseEstimator': <class 'sklearn.base.BaseEstimator'>, 'BayesianRidge': <class 'sklearn.linear_model.bayes.BayesianRidge'>, 'CountVectorizer': <class 'sklearn.feature_extraction.text.CountVectorizer'>, 'Counter': <class 'collections.Counter'>, 'DecisionTreeRegressor': <class 'sklearn.tree.tree.DecisionTreeRegressor'>, 'DenseTransformer': <class 'mlxtend.preprocessing.dense_transformer.DenseTransformer'>, 'FeatureUnion': <class 'sklearn.pipeline.FeatureUnion'>, 'FunctionTransformer': <class 'sklearn.preprocessing._function_transformer.FunctionTransformer'>, 'GradientBoostingRegressor': <class 'sklearn.ensemble.gradient_boosting.GradientBoostingRegressor'>, ...}
        self.user_ns = {'AdaBoostRegressor': <class 'sklearn.ensemble.weight_boosting.AdaBoostRegressor'>, 'BaseEstimator': <class 'sklearn.base.BaseEstimator'>, 'BayesianRidge': <class 'sklearn.linear_model.bayes.BayesianRidge'>, 'CountVectorizer': <class 'sklearn.feature_extraction.text.CountVectorizer'>, 'Counter': <class 'collections.Counter'>, 'DecisionTreeRegressor': <class 'sklearn.tree.tree.DecisionTreeRegressor'>, 'DenseTransformer': <class 'mlxtend.preprocessing.dense_transformer.DenseTransformer'>, 'FeatureUnion': <class 'sklearn.pipeline.FeatureUnion'>, 'FunctionTransformer': <class 'sklearn.preprocessing._function_transformer.FunctionTransformer'>, 'GradientBoostingRegressor': <class 'sklearn.ensemble.gradient_boosting.GradientBoostingRegressor'>, ...}
   2883             finally:
   2884                 # Reset our crash handler in place
   2885                 sys.excepthook = old_excepthook
   2886         except SystemExit as e:

...........................................................................
/Users/jasminetan/code/jt-dsi-projects/project-four/<ipython-input-6-10aba4672960> in <module>()
----> 1 clf_pipelines[0].fit(X_train, np.array(y_train))
      2 clf_pipelines[0].score(X_test, np.array(y_test))

...........................................................................
/Users/jasminetan/anaconda2/lib/python2.7/site-packages/sklearn/pipeline.py in fit(self=Pipeline(memory=None,
     steps=[('encode', Fea...or=None, fit_prior=True),
          n_jobs=-1))]), X=                                                ...35000.00   140000.00  

[762 rows x 1146 columns], y=array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0,..., ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]]), **fit_params={})
    245         self : Pipeline
    246             This estimator
    247         """
    248         Xt, fit_params = self._fit(X, y, **fit_params)
    249         if self._final_estimator is not None:
--> 250             self._final_estimator.fit(Xt, y, **fit_params)
        self._final_estimator.fit = <bound method OneVsRestClassifier.fit of OneVsRe...rior=None, fit_prior=True),
          n_jobs=-1)>
        Xt = array([[-3.60007255e+04,  1.40333753e+04,  2.433...94824286e-01, -3.83042766e-01,  1.12792440e+00]])
        y = array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0,..., ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])
        fit_params = {}
    251         return self
    252 
    253     def fit_transform(self, X, y=None, **fit_params):
    254         """Fit the model and transform with the final estimator

...........................................................................
/Users/jasminetan/anaconda2/lib/python2.7/site-packages/sklearn/multiclass.py in fit(self=OneVsRestClassifier(estimator=MultinomialNB(alph...prior=None, fit_prior=True),
          n_jobs=-1), X=array([[-3.60007255e+04,  1.40333753e+04,  2.433...94824286e-01, -3.83042766e-01,  1.12792440e+00]]), y=array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0,..., ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]]))
    210         # of spawning threads.  See joblib issue #112.
    211         self.estimators_ = Parallel(n_jobs=self.n_jobs)(delayed(_fit_binary)(
    212             self.estimator, X, column, classes=[
    213                 "not %s" % self.label_binarizer_.classes_[i],
    214                 self.label_binarizer_.classes_[i]])
--> 215             for i, column in enumerate(columns))
        columns = <generator object <genexpr>>
    216 
    217         return self
    218 
    219     @if_delegate_has_method('estimator')

...........................................................................
/Users/jasminetan/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=Parallel(n_jobs=-1), iterable=<generator object <genexpr>>)
    784             if pre_dispatch == "all" or n_jobs == 1:
    785                 # The iterable was consumed all at once by the above for loop.
    786                 # No need to wait for async callbacks to trigger to
    787                 # consumption.
    788                 self._iterating = False
--> 789             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=-1)>
    790             # Make sure that we get a last message telling us we are done
    791             elapsed_time = time.time() - self._start_time
    792             self._print('Done %3i out of %3i | elapsed: %s finished',
    793                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
ValueError                                         Fri Apr 27 08:53:21 2018
PID: 26482            Python 2.7.14: /Users/jasminetan/anaconda2/bin/python
...........................................................................
/Users/jasminetan/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = <function _fit_binary>
        args = (MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True), array([[-3.60007255e+04,  1.40333753e+04,  2.433...94824286e-01, -3.83042766e-01,  1.12792440e+00]]), array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]))
        kwargs = {'classes': ['not 1', 1]}
        self.items = [(<function _fit_binary>, (MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True), array([[-3.60007255e+04,  1.40333753e+04,  2.433...94824286e-01, -3.83042766e-01,  1.12792440e+00]]), array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])), {'classes': ['not 1', 1]})]
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
/Users/jasminetan/anaconda2/lib/python2.7/site-packages/sklearn/multiclass.py in _fit_binary(estimator=MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True), X=array([[-3.60007255e+04,  1.40333753e+04,  2.433...94824286e-01, -3.83042766e-01,  1.12792440e+00]]), y=array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]), classes=['not 1', 1])
     75             warnings.warn("Label %s is present in all training examples." %
     76                           str(classes[c]))
     77         estimator = _ConstantPredictor().fit(X, unique_y)
     78     else:
     79         estimator = clone(estimator)
---> 80         estimator.fit(X, y)
        estimator.fit = <bound method MultinomialNB.fit of MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)>
        X = array([[-3.60007255e+04,  1.40333753e+04,  2.433...94824286e-01, -3.83042766e-01,  1.12792440e+00]])
        y = array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])
     81     return estimator
     82 
     83 
     84 def _partial_fit_binary(estimator, X, y):

...........................................................................
/Users/jasminetan/anaconda2/lib/python2.7/site-packages/sklearn/naive_bayes.py in fit(self=MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True), X=array([[-3.60007255e+04,  1.40333753e+04,  2.433...94824286e-01, -3.83042766e-01,  1.12792440e+00]]), y=array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]), sample_weight=None)
    599         # and feature log probas
    600         n_effective_classes = Y.shape[1]
    601         self.class_count_ = np.zeros(n_effective_classes, dtype=np.float64)
    602         self.feature_count_ = np.zeros((n_effective_classes, n_features),
    603                                        dtype=np.float64)
--> 604         self._count(X, Y)
        self._count = <bound method MultinomialNB._count of MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)>
        X = array([[-3.60007255e+04,  1.40333753e+04,  2.433...94824286e-01, -3.83042766e-01,  1.12792440e+00]])
        Y = array([[1., 0.],
       [1., 0.],
       [1., 0....     [1., 0.],
       [0., 1.],
       [1., 0.]])
    605         alpha = self._check_alpha()
    606         self._update_feature_log_prob(alpha)
    607         self._update_class_log_prior(class_prior=class_prior)
    608         return self

...........................................................................
/Users/jasminetan/anaconda2/lib/python2.7/site-packages/sklearn/naive_bayes.py in _count(self=MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True), X=array([[-3.60007255e+04,  1.40333753e+04,  2.433...94824286e-01, -3.83042766e-01,  1.12792440e+00]]), Y=array([[1., 0.],
       [1., 0.],
       [1., 0....     [1., 0.],
       [0., 1.],
       [1., 0.]]))
    703         self.class_prior = class_prior
    704 
    705     def _count(self, X, Y):
    706         """Count and smooth feature occurrences."""
    707         if np.any((X.data if issparse(X) else X) < 0):
--> 708             raise ValueError("Input X must be non-negative")
    709         self.feature_count_ += safe_sparse_dot(Y.T, X)
    710         self.class_count_ += Y.sum(axis=0)
    711 
    712     def _update_feature_log_prob(self, alpha):

ValueError: Input X must be non-negative
___________________________________________________________________________