In [37]:
#!/usr/bin/python

import sys
import pickle
import pandas as pd
import numpy
import time
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.model_selection import GridSearchCV 
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.cross_validation import train_test_split
sys.path.append("../tools/")
from feature_format import featureFormat, targetFeatureSplit
from tester import dump_classifier_and_data
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
import matplotlib.pyplot

In [38]:
### Task 1: Select what features you'll use.
### features_list is a list of strings, each of which is a feature name.
### The first feature must be "poi".
# features_list is a list of my selected features
# all_features is a list for exploration
#features_list = ['poi', 'bonus', 'expenses', 'bon_plus_expenses', 'bon_sal_ratio', \
#                'to_msg_ratio', 'from_msg_ratio']

features_list = ['poi', 'bon_plus_expenses']
all_features = ['poi', 'salary', 'bonus', 'long_term_incentive', \
                'deferred_income', 'expenses', 'total_payments', \
                'exercised_stock_options', 'restricted_stock', 'other', 'to_messages', \
                'email_address', 'from_poi_to_this_person', 'from_messages', \
                'from_this_person_to_poi', 'shared_receipt_with_poi', 'to_msg_ratio', \
                'from_msg_ratio', 'bon_plus_expenses', 'bon_sal_ratio'] 


### Load the dictionary containing the dataset
with open("final_project_dataset.pkl", "r") as data_file:
    data_dict = pickle.load(data_file)

df = pd.DataFrame.from_records(list(data_dict.values()))
employees = pd.Series(list(data_dict.keys()))

# set the index of df to be the employees series:
df.set_index(employees, inplace=True)

In [39]:
# Find how many missing values are in all features
for feature in all_features:
    try:
        print "Number of missing values in " + str(feature) + ": " + str(df[feature].value_counts(dropna=False)[0])
    except:
        print "Created feature: ", str(feature)

Number of missing values in poi: 128
Number of missing values in salary: 51
Number of missing values in bonus: 64
Number of missing values in long_term_incentive: 80
Number of missing values in deferred_income: 97
Number of missing values in expenses: 51
Number of missing values in total_payments: 21
Number of missing values in exercised_stock_options: 44
Number of missing values in restricted_stock: 36
Number of missing values in other: 53
Number of missing values in to_messages: 60
Number of missing values in email_address: 35
Number of missing values in from_poi_to_this_person: 12
Number of missing values in from_messages: 60
Number of missing values in from_this_person_to_poi: 20
Number of missing values in shared_receipt_with_poi: 60
Created feature:  to_msg_ratio
Created feature:  from_msg_ratio
Created feature:  bon_plus_expenses
Created feature:  bon_sal_ratio


In [40]:
# Create another working dataframe to make new features 
df_new = df.apply(lambda x: pd.to_numeric(x, errors='coerce')).copy()

In [41]:
# from_msg_ratio is ratio messages received from poi to total messages received
df_new['to_msg_ratio'] = df_new.from_this_person_to_poi.divide(df_new.to_messages, axis = 'index')

# create to_msg_ratio by dividing from_this_person_to_poi from to_messages
df_new['from_msg_ratio'] = df_new.from_poi_to_this_person.divide(df_new.from_messages, axis = 'index')

# create a new feature by adding expenses and bonus together
df_new['bon_plus_expenses'] = df_new['bonus'].add(df_new['expenses'], axis = 'index')
# new feature of bonus to salary ratio

df_new['bon_sal_ratio'] = df_new['bonus'].divide(df_new['salary'], axis = 'index')
# new feature of bonus to expenses ratio

In [42]:
# Fill NaN with 0 where operations created NaN in some rows
df_new.fillna(0, inplace = True)

In [43]:
# after you create features, the column names will be your new features
# create a list of column names:
new_features_list = df_new.columns.values
new_features_list

array(['bonus', 'deferral_payments', 'deferred_income', 'director_fees',
       'email_address', 'exercised_stock_options', 'expenses',
       'from_messages', 'from_poi_to_this_person',
       'from_this_person_to_poi', 'loan_advances', 'long_term_incentive',
       'other', 'poi', 'restricted_stock', 'restricted_stock_deferred',
       'salary', 'shared_receipt_with_poi', 'to_messages',
       'total_payments', 'total_stock_value', 'to_msg_ratio',
       'from_msg_ratio', 'bon_plus_expenses', 'bon_sal_ratio'], dtype=object)

In [44]:
### Task 2: Remove outliers

# From the mini project, we have to remove the one outlier called "TOTAL" as 
# a spreadsheet quirk
df_new.drop(['TOTAL'], inplace=True)

In [45]:
# Once outliers are removed, data values should be scaled
# Email ratios definitely don't match bonus and expenses scales
df_new_scaled = (df_new- df_new.min()) / (df_new.max() - df_new.min())
# Some of these may have created NaNs in the dataset
# Fill the NaN with 0 again
df_new_scaled.fillna(0, inplace = True)

In [46]:
# create a dictionary from the dataframe
df_dict = df_new_scaled.to_dict('index')

In [47]:
### Task 3: Create new feature(s)
### Store to my_dataset for easy export below.
my_dataset = df_dict

In [48]:
### Extract features and labels from dataset for local testing
# Created one function for exploration then another for use after feature selection
exploration_data = featureFormat(my_dataset, all_features, sort_keys = True)
labels_exploration, features_exploration = targetFeatureSplit(exploration_data)

data = featureFormat(my_dataset, features_list, sort_keys = True)
labels, features = targetFeatureSplit(data)

In [49]:
### Task 4: Try a varity of classifiers
### Please name your classifier clf for easy export below.
### Note that if you want to do PCA or other multi-stage operations,
### you'll need to use Pipelines. For more info:
### http://scikit-learn.org/stable/modules/pipeline.html

# Provided to give you a starting point. Try a variety of classifiers.

In [50]:
# First one tried is RandomForestClassifier
rfc_exploration = RandomForestClassifier()
rfc_exploration = rfc_exploration.fit(features_exploration, labels_exploration)

In [51]:
# Also trying a decision tree classifier because tree classifiers make sense here
dc_exploration = DecisionTreeClassifier()
dc_exploration= dc_exploration.fit(features_exploration, labels_exploration)

In [52]:
# This function appends the feature and according importance value from tree
# classifier to a list to view more neatly
rfc_impt = []
dc_impt = []

def input_impt(impt_list, features_list, impts):
    for i in range(len(impts)):
        impt_list.append( (features_list[i], impts[i]) )
    
    impt_list.sort(key = lambda tup: tup[1], reverse = True)
    
    return impt_list

In [53]:
# Call previous function to append and sort feature importances 
input_impt(rfc_impt, all_features[1:], rfc_exploration.feature_importances_)
input_impt(dc_impt, all_features[1:], dc_exploration.feature_importances_)

[('expenses', 0.22970867831003985),
 ('bon_plus_expenses', 0.20789151356080485),
 ('exercised_stock_options', 0.16860982247565254),
 ('restricted_stock', 0.12049154306372169),
 ('other', 0.10873640794900638),
 ('from_messages', 0.087128532010421797),
 ('to_msg_ratio', 0.077433502630353007),
 ('salary', 0.0),
 ('bonus', 0.0),
 ('long_term_incentive', 0.0),
 ('deferred_income', 0.0),
 ('total_payments', 0.0),
 ('to_messages', 0.0),
 ('email_address', 0.0),
 ('from_poi_to_this_person', 0.0),
 ('from_this_person_to_poi', 0.0),
 ('shared_receipt_with_poi', 0.0),
 ('from_msg_ratio', 0.0),
 ('bon_sal_ratio', 0.0)]

In [54]:
print "RandomForestClassifier importances values: "
for item in rfc_impt:
    print item[0] + " : " + str(item[1])

RandomForestClassifier importances values: 
exercised_stock_options : 0.143115401124
bon_plus_expenses : 0.115423560641
expenses : 0.097794645044
bonus : 0.0892792109902
from_msg_ratio : 0.0785316061867
total_payments : 0.0644464000558
long_term_incentive : 0.0600373289676
other : 0.054380752044
shared_receipt_with_poi : 0.0509946008215
restricted_stock : 0.0388023976594
salary : 0.038383786569
to_messages : 0.0362348770677
bon_sal_ratio : 0.0299859519335
to_msg_ratio : 0.0276076202916
deferred_income : 0.0224873816999
from_this_person_to_poi : 0.0214087517921
from_messages : 0.0162038168483
from_poi_to_this_person : 0.0148819102634
email_address : 0.0


In [55]:
print "DecisionTreeClassifier importances values: "
for item in dc_impt:
    print item[0] + " : " + str(item[1])

DecisionTreeClassifier importances values: 
expenses : 0.22970867831
bon_plus_expenses : 0.207891513561
exercised_stock_options : 0.168609822476
restricted_stock : 0.120491543064
other : 0.108736407949
from_messages : 0.0871285320104
to_msg_ratio : 0.0774335026304
salary : 0.0
bonus : 0.0
long_term_incentive : 0.0
deferred_income : 0.0
total_payments : 0.0
to_messages : 0.0
email_address : 0.0
from_poi_to_this_person : 0.0
from_this_person_to_poi : 0.0
shared_receipt_with_poi : 0.0
from_msg_ratio : 0.0
bon_sal_ratio : 0.0


In [56]:
# Assign to new classifiers after choosing features

rfc = rfc_exploration.fit(features, labels)
dc = dc_exploration.fit(features, labels)

In [57]:
### Task 5: Tune your classifier to achieve better than .3 precision and recall
### using our testing script. Check the tester.py script in the final project
### folder for details on the evaluation method, especially the test_classifier
### function. Because of the small size of the dataset, the script uses
### stratified shuffle split cross validation. For more info:
### http://scikit-learn.org/stable/modules/generated/sklearn.cross_validation.StratifiedShuffleSplit.html

# Example starting point. Try investigating other evaluation techniques!
features_train, features_test, labels_train, labels_test = \
    train_test_split(features, labels, test_size=0.3, random_state=42, 
                    shuffle = True, stratify = )

In [58]:
# straified cv for parameters, 100 fold, and shuffled
best_cv = StratifiedShuffleSplit(n_splits = 100, random_state=42) 

In [59]:
# random_state is to bring consistency to results
# results to best_params_ were inconsistent before adding random_state
# If you uncomment to run these lines of code, it may take a while.
# Added start and end times to see how long this all takes because
# this exhaustive method has been taking forever. 
# Both CV settings are set to optimize for f1 to get better precision and recall
# It took me 78 minutes to run rfc and about 3 to run decisiontreeclassifier

#start_gridcv_rfc = time.time()
#rfc_param_grid = {'n_estimators': [1,2, 3, 10, 100], 
#                 'min_samples_split': [2, 3, 5],
#                 'random_state': [2],
#                 'max_features': [1, 2, 3],
#                 'max_depth' : [2, 3, 5, 10, 50],
#                 'min_samples_leaf': [1, 2, 3, 10]
#                 }

#grid_cv_rfc = GridSearchCV(estimator = rfc, param_grid = rfc_param_grid, cv = best_cv,
#                          n_jobs = 5, scoring = 'f1')
#grid_cv_rfc.fit(features, labels)
#end_gridcv_rfc = time.time()
#print "Minutes elapsed: " + str((float(end_gridcv_rfc - start_gridcv_rfc) / 60))

In [60]:
# gridsearchcv for decisiontreeclassifier

start_gridcv_dc = time.time()
dc_param_grid = {'min_samples_split' : [2, 3, 4, 5, 10, 50],
                 'max_features' : [1, 2, 3, 4, 'auto', 'sqrt', 'log2'],
                 'min_samples_leaf': [1, 2, 3, 10, 20],
                'random_state' : [2]
                }
grid_cv_dc = GridSearchCV(estimator = dc, param_grid = dc_param_grid, cv = best_cv,
                         n_jobs = 5, scoring = 'f1')
grid_cv_dc.fit(features, labels)
end_gridcv_dc = time.time()
print "Minutes elapsed: " + str((float(end_gridcv_dc - start_gridcv_dc) / 60))

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
/Users/bean/anaconda2/lib/python2.7/runpy.py in _run_module_as_main(mod_name='ipykernel.__main__', alter_argv=1)
    169     pkg_name = mod_name.rpartition('.')[0]
    170     main_globals = sys.modules["__main__"].__dict__
    171     if alter_argv:
    172         sys.argv[0] = fname
    173     return _run_code(code, main_globals, None,
--> 174                      "__main__", fname, loader, pkg_name)
        fname = '/Users/bean/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py'
        loader = <pkgutil.ImpLoader instance>
        pkg_name = 'ipykernel'
    175 
    176 def run_module(mod_name, init_globals=None,
    177                run_name=None, alter_sys=False):
    178     """Execute a module's code without importing it

...........................................................................
/Users/bean/anaconda2/lib/python2.7/runpy.py in _run_code(code=<code object <module> at 0x10e5f22b0, file "/Use...2.7/site-packages/ipykernel/__main__.py", line 1>, run_globals={'__builtins__': <module '__builtin__' (built-in)>, '__doc__': None, '__file__': '/Users/bean/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py', '__loader__': <pkgutil.ImpLoader instance>, '__name__': '__main__', '__package__': 'ipykernel', 'app': <module 'ipykernel.kernelapp' from '/Users/bean/...python2.7/site-packages/ipykernel/kernelapp.pyc'>}, init_globals=None, mod_name='__main__', mod_fname='/Users/bean/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py', mod_loader=<pkgutil.ImpLoader instance>, pkg_name='ipykernel')
     67         run_globals.update(init_globals)
     68     run_globals.update(__name__ = mod_name,
     69                        __file__ = mod_fname,
     70                        __loader__ = mod_loader,
     71                        __package__ = pkg_name)
---> 72     exec code in run_globals
        code = <code object <module> at 0x10e5f22b0, file "/Use...2.7/site-packages/ipykernel/__main__.py", line 1>
        run_globals = {'__builtins__': <module '__builtin__' (built-in)>, '__doc__': None, '__file__': '/Users/bean/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py', '__loader__': <pkgutil.ImpLoader instance>, '__name__': '__main__', '__package__': 'ipykernel', 'app': <module 'ipykernel.kernelapp' from '/Users/bean/...python2.7/site-packages/ipykernel/kernelapp.pyc'>}
     73     return run_globals
     74 
     75 def _run_module_code(code, init_globals=None,
     76                     mod_name=None, mod_fname=None,

...........................................................................
/Users/bean/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py in <module>()
      1 if __name__ == '__main__':
      2     from ipykernel import kernelapp as app
----> 3     app.launch_new_instance()

...........................................................................
/Users/bean/anaconda2/lib/python2.7/site-packages/traitlets/config/application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    653 
    654         If a global instance already exists, this reinitializes and starts it
    655         """
    656         app = cls.instance(**kwargs)
    657         app.initialize(argv)
--> 658         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    659 
    660 #-----------------------------------------------------------------------------
    661 # utility functions, for convenience
    662 #-----------------------------------------------------------------------------

...........................................................................
/Users/bean/anaconda2/lib/python2.7/site-packages/ipykernel/kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    472             return self.subapp.start()
    473         if self.poller is not None:
    474             self.poller.start()
    475         self.kernel.start()
    476         try:
--> 477             ioloop.IOLoop.instance().start()
    478         except KeyboardInterrupt:
    479             pass
    480 
    481 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
/Users/bean/anaconda2/lib/python2.7/site-packages/zmq/eventloop/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    172             )
    173         return loop
    174     
    175     def start(self):
    176         try:
--> 177             super(ZMQIOLoop, self).start()
        self.start = <bound method ZMQIOLoop.start of <zmq.eventloop.ioloop.ZMQIOLoop object>>
    178         except ZMQError as e:
    179             if e.errno == ETERM:
    180                 # quietly return on ETERM
    181                 pass

...........................................................................
/Users/bean/anaconda2/lib/python2.7/site-packages/tornado/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    883                 self._events.update(event_pairs)
    884                 while self._events:
    885                     fd, events = self._events.popitem()
    886                     try:
    887                         fd_obj, handler_func = self._handlers[fd]
--> 888                         handler_func(fd_obj, events)
        handler_func = <function null_wrapper>
        fd_obj = <zmq.sugar.socket.Socket object>
        events = 5
    889                     except (OSError, IOError) as e:
    890                         if errno_from_exception(e) == errno.EPIPE:
    891                             # Happens when the client closes the connection
    892                             pass

...........................................................................
/Users/bean/anaconda2/lib/python2.7/site-packages/tornado/stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 5), **kwargs={})
    272         # Fast path when there are no active contexts.
    273         def null_wrapper(*args, **kwargs):
    274             try:
    275                 current_state = _state.contexts
    276                 _state.contexts = cap_contexts[0]
--> 277                 return fn(*args, **kwargs)
        args = (<zmq.sugar.socket.Socket object>, 5)
        kwargs = {}
    278             finally:
    279                 _state.contexts = current_state
    280         null_wrapper._wrapped = True
    281         return null_wrapper

...........................................................................
/Users/bean/anaconda2/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=5)
    435             # dispatch events:
    436             if events & IOLoop.ERROR:
    437                 gen_log.error("got POLLERR event on ZMQStream, which doesn't make sense")
    438                 return
    439             if events & IOLoop.READ:
--> 440                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    441                 if not self.socket:
    442                     return
    443             if events & IOLoop.WRITE:
    444                 self._handle_send()

...........................................................................
/Users/bean/anaconda2/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    467                 gen_log.error("RECV Error: %s"%zmq.strerror(e.errno))
    468         else:
    469             if self._recv_callback:
    470                 callback = self._recv_callback
    471                 # self._recv_callback = None
--> 472                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    473                 
    474         # self.update_state()
    475         
    476 

...........................................................................
/Users/bean/anaconda2/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    409         close our socket."""
    410         try:
    411             # Use a NullContext to ensure that all StackContexts are run
    412             # inside our blanket exception handler rather than outside.
    413             with stack_context.NullContext():
--> 414                 callback(*args, **kwargs)
        callback = <function null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    415         except:
    416             gen_log.error("Uncaught exception, closing connection.",
    417                           exc_info=True)
    418             # Close the socket on an uncaught exception from a user callback

...........................................................................
/Users/bean/anaconda2/lib/python2.7/site-packages/tornado/stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    272         # Fast path when there are no active contexts.
    273         def null_wrapper(*args, **kwargs):
    274             try:
    275                 current_state = _state.contexts
    276                 _state.contexts = cap_contexts[0]
--> 277                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    278             finally:
    279                 _state.contexts = current_state
    280         null_wrapper._wrapped = True
    281         return null_wrapper

...........................................................................
/Users/bean/anaconda2/lib/python2.7/site-packages/ipykernel/kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    278         if self.control_stream:
    279             self.control_stream.on_recv(self.dispatch_control, copy=False)
    280 
    281         def make_dispatcher(stream):
    282             def dispatcher(msg):
--> 283                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    284             return dispatcher
    285 
    286         for s in self.shell_streams:
    287             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
/Users/bean/anaconda2/lib/python2.7/site-packages/ipykernel/kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {u'allow_stdin': True, u'code': u'# gridsearchcv for decisiontreeclassifier\n\ns...r((float(end_gridcv_dc - start_gridcv_dc) / 60))', u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {u'date': datetime.datetime(2017, 10, 8, 8, 17, 48, 275556, tzinfo=tzutc()), u'msg_id': u'DD91690DE13F43F29E338E5FD20BE7AD', u'msg_type': u'execute_request', u'session': u'33F218AE1CD946EC98CA2A6B62AFEC1C', u'username': u'username', u'version': u'5.0'}, 'metadata': {}, 'msg_id': u'DD91690DE13F43F29E338E5FD20BE7AD', 'msg_type': u'execute_request', 'parent_header': {}})
    230             self.log.warn("Unknown message type: %r", msg_type)
    231         else:
    232             self.log.debug("%s: %s", msg_type, msg)
    233             self.pre_handler_hook()
    234             try:
--> 235                 handler(stream, idents, msg)
        handler = <bound method IPythonKernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = ['33F218AE1CD946EC98CA2A6B62AFEC1C']
        msg = {'buffers': [], 'content': {u'allow_stdin': True, u'code': u'# gridsearchcv for decisiontreeclassifier\n\ns...r((float(end_gridcv_dc - start_gridcv_dc) / 60))', u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {u'date': datetime.datetime(2017, 10, 8, 8, 17, 48, 275556, tzinfo=tzutc()), u'msg_id': u'DD91690DE13F43F29E338E5FD20BE7AD', u'msg_type': u'execute_request', u'session': u'33F218AE1CD946EC98CA2A6B62AFEC1C', u'username': u'username', u'version': u'5.0'}, 'metadata': {}, 'msg_id': u'DD91690DE13F43F29E338E5FD20BE7AD', 'msg_type': u'execute_request', 'parent_header': {}}
    236             except Exception:
    237                 self.log.error("Exception in message handler:", exc_info=True)
    238             finally:
    239                 self.post_handler_hook()

...........................................................................
/Users/bean/anaconda2/lib/python2.7/site-packages/ipykernel/kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=['33F218AE1CD946EC98CA2A6B62AFEC1C'], parent={'buffers': [], 'content': {u'allow_stdin': True, u'code': u'# gridsearchcv for decisiontreeclassifier\n\ns...r((float(end_gridcv_dc - start_gridcv_dc) / 60))', u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {u'date': datetime.datetime(2017, 10, 8, 8, 17, 48, 275556, tzinfo=tzutc()), u'msg_id': u'DD91690DE13F43F29E338E5FD20BE7AD', u'msg_type': u'execute_request', u'session': u'33F218AE1CD946EC98CA2A6B62AFEC1C', u'username': u'username', u'version': u'5.0'}, 'metadata': {}, 'msg_id': u'DD91690DE13F43F29E338E5FD20BE7AD', 'msg_type': u'execute_request', 'parent_header': {}})
    394         if not silent:
    395             self.execution_count += 1
    396             self._publish_execute_input(code, parent, self.execution_count)
    397 
    398         reply_content = self.do_execute(code, silent, store_history,
--> 399                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = True
    400 
    401         # Flush output before sending the reply.
    402         sys.stdout.flush()
    403         sys.stderr.flush()

...........................................................................
/Users/bean/anaconda2/lib/python2.7/site-packages/ipykernel/ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code=u'# gridsearchcv for decisiontreeclassifier\n\ns...r((float(end_gridcv_dc - start_gridcv_dc) / 60))', silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    191 
    192         self._forward_input(allow_stdin)
    193 
    194         reply_content = {}
    195         try:
--> 196             res = shell.run_cell(code, store_history=store_history, silent=silent)
        res = undefined
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = u'# gridsearchcv for decisiontreeclassifier\n\ns...r((float(end_gridcv_dc - start_gridcv_dc) / 60))'
        store_history = True
        silent = False
    197         finally:
    198             self._restore_input()
    199 
    200         if res.error_before_exec is not None:

...........................................................................
/Users/bean/anaconda2/lib/python2.7/site-packages/ipykernel/zmqshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, *args=(u'# gridsearchcv for decisiontreeclassifier\n\ns...r((float(end_gridcv_dc - start_gridcv_dc) / 60))',), **kwargs={'silent': False, 'store_history': True})
    528             )
    529         self.payload_manager.write_payload(payload)
    530 
    531     def run_cell(self, *args, **kwargs):
    532         self._last_traceback = None
--> 533         return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
        self.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        args = (u'# gridsearchcv for decisiontreeclassifier\n\ns...r((float(end_gridcv_dc - start_gridcv_dc) / 60))',)
        kwargs = {'silent': False, 'store_history': True}
    534 
    535     def _showtraceback(self, etype, evalue, stb):
    536         # try to preserve ordering of tracebacks and print statements
    537         sys.stdout.flush()

...........................................................................
/Users/bean/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell=u'# gridsearchcv for decisiontreeclassifier\n\ns...r((float(end_gridcv_dc - start_gridcv_dc) / 60))', store_history=True, silent=False, shell_futures=True)
   2713                 self.displayhook.exec_result = result
   2714 
   2715                 # Execute the user code
   2716                 interactivity = "none" if silent else self.ast_node_interactivity
   2717                 has_raised = self.run_ast_nodes(code_ast.body, cell_name,
-> 2718                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler instance>
   2719                 
   2720                 self.last_execution_succeeded = not has_raised
   2721 
   2722                 # Reset this so later displayed values do not modify the

...........................................................................
/Users/bean/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Expr object>, <_ast.Assign object>, <_ast.Print object>], cell_name='<ipython-input-60-443113a9d8f8>', interactivity='none', compiler=<IPython.core.compilerop.CachingCompiler instance>, result=<ExecutionResult object at 11a4cacd0, execution_..._before_exec=None error_in_exec=None result=None>)
   2817 
   2818         try:
   2819             for i, node in enumerate(to_run_exec):
   2820                 mod = ast.Module([node])
   2821                 code = compiler(mod, cell_name, "exec")
-> 2822                 if self.run_code(code, result):
        self.run_code = <bound method ZMQInteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0x11a6d60b0, file "<ipython-input-60-443113a9d8f8>", line 11>
        result = <ExecutionResult object at 11a4cacd0, execution_..._before_exec=None error_in_exec=None result=None>
   2823                     return True
   2824 
   2825             for i, node in enumerate(to_run_interactive):
   2826                 mod = ast.Interactive([node])

...........................................................................
/Users/bean/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x11a6d60b0, file "<ipython-input-60-443113a9d8f8>", line 11>, result=<ExecutionResult object at 11a4cacd0, execution_..._before_exec=None error_in_exec=None result=None>)
   2877         outflag = 1  # happens in more places, so it's easier as default
   2878         try:
   2879             try:
   2880                 self.hooks.pre_run_code_hook()
   2881                 #rprint('Running code', repr(code_obj)) # dbg
-> 2882                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0x11a6d60b0, file "<ipython-input-60-443113a9d8f8>", line 11>
        self.user_global_ns = {'DecisionTreeClassifier': <class 'sklearn.tree.tree.DecisionTreeClassifier'>, 'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'In': ['', u'#!/usr/bin/python\n\nimport sys\nimport pickle... import accuracy_score\nimport matplotlib.pyplot', u'### Task 1: Select what features you\'ll use.\...s series:\ndf.set_index(employees, inplace=True)', u'# Find how many missing values are in all feat...n        print "Created feature: ", str(feature)', u"# Create another working dataframe to make new...bda x: pd.to_numeric(x, errors='coerce')).copy()", u"# from_msg_ratio is ratio messages received fr...ndex')\n# new feature of bonus to expenses ratio", u'# Fill NaN with 0 where operations created NaN in some rows\ndf_new.fillna(0, inplace = True)', u'# after you create features, the column names ..._list = df_new.columns.values\nnew_features_list', u'### Task 2: Remove outliers\n\n# From the mini...et quirk\ndf_new.drop([\'TOTAL\'], inplace=True)', u"# Once outliers are removed, data values shoul...0 again\ndf_new_scaled.fillna(0, inplace = True)", u"# create a dictionary from the dataframe\ndf_dict = df_new_scaled.to_dict('index')", u'### Task 3: Create new feature(s)\n### Store t...set for easy export below.\nmy_dataset = df_dict', u'### Extract features and labels from dataset f...ue)\nlabels, features = targetFeatureSplit(data)', u"### Task 4: Try a varity of classifiers\n### P... a starting point. Try a variety of classifiers.", u'# First one tried is RandomForestClassifier\nr...on.fit(features_exploration, labels_exploration)', u'# Also trying a decision tree classifier becau...on.fit(features_exploration, labels_exploration)', u'# This function appends the feature and accord...[1], reverse = True)\n    \n    return impt_list', u'# Call previous function to append and sort fe...atures[1:], dc_exploration.feature_importances_)', u'print "RandomForestClassifier importances valu..._impt:\n    print item[0] + " : " + str(item[1])', u'print "DecisionTreeClassifier importances valu..._impt:\n    print item[0] + " : " + str(item[1])', ...], 'MinMaxScaler': <class 'sklearn.preprocessing.data.MinMaxScaler'>, 'Out': {7: array(['bonus', 'deferral_payments', 'deferred_i...n_plus_expenses', 'bon_sal_ratio'], dtype=object), 17: [('restricted_stock', 0.21352158097564938), ('bon_plus_expenses', 0.20789151356080482), ('expenses', 0.18742229744098179), ('exercised_stock_options', 0.1178661654327829), ('other', 0.10873640794900637), ('from_messages', 0.087128532010421783), ('from_this_person_to_poi', 0.054368203974503197), ('to_msg_ratio', 0.023065298655849813), ('salary', 0.0), ('bonus', 0.0), ('long_term_incentive', 0.0), ('deferred_income', 0.0), ('total_payments', 0.0), ('to_messages', 0.0), ('email_address', 0.0), ('from_poi_to_this_person', 0.0), ('shared_receipt_with_poi', 0.0), ('from_msg_ratio', 0.0), ('bon_sal_ratio', 0.0)], 32: {'max_features': 3, 'min_samples_leaf': 1, 'min_samples_split': 2, 'random_state': 2}, 33: {'max_features': 2, 'min_samples_leaf': 1, 'min_samples_split': 8, 'random_state': 2}, 34: DecisionTreeClassifier(class_weight=None, criter...  presort=False, random_state=2, splitter='best'), 43: array(['bonus', 'deferral_payments', 'deferred_i...n_plus_expenses', 'bon_sal_ratio'], dtype=object), 53: [('expenses', 0.22970867831003985), ('bon_plus_expenses', 0.20789151356080485), ('exercised_stock_options', 0.16860982247565254), ('restricted_stock', 0.12049154306372169), ('other', 0.10873640794900638), ('from_messages', 0.087128532010421797), ('to_msg_ratio', 0.077433502630353007), ('salary', 0.0), ('bonus', 0.0), ('long_term_incentive', 0.0), ('deferred_income', 0.0), ('total_payments', 0.0), ('to_messages', 0.0), ('email_address', 0.0), ('from_poi_to_this_person', 0.0), ('from_this_person_to_poi', 0.0), ('shared_receipt_with_poi', 0.0), ('from_msg_ratio', 0.0), ('bon_sal_ratio', 0.0)]}, 'Pipeline': <class 'sklearn.pipeline.Pipeline'>, 'RandomForestClassifier': <class 'sklearn.ensemble.forest.RandomForestClassifier'>, 'RandomizedSearchCV': <class 'sklearn.model_selection._search.RandomizedSearchCV'>, 'SelectKBest': <class 'sklearn.feature_selection.univariate_selection.SelectKBest'>, 'StratifiedShuffleSplit': <class 'sklearn.model_selection._split.StratifiedShuffleSplit'>, ...}
        self.user_ns = {'DecisionTreeClassifier': <class 'sklearn.tree.tree.DecisionTreeClassifier'>, 'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'In': ['', u'#!/usr/bin/python\n\nimport sys\nimport pickle... import accuracy_score\nimport matplotlib.pyplot', u'### Task 1: Select what features you\'ll use.\...s series:\ndf.set_index(employees, inplace=True)', u'# Find how many missing values are in all feat...n        print "Created feature: ", str(feature)', u"# Create another working dataframe to make new...bda x: pd.to_numeric(x, errors='coerce')).copy()", u"# from_msg_ratio is ratio messages received fr...ndex')\n# new feature of bonus to expenses ratio", u'# Fill NaN with 0 where operations created NaN in some rows\ndf_new.fillna(0, inplace = True)', u'# after you create features, the column names ..._list = df_new.columns.values\nnew_features_list', u'### Task 2: Remove outliers\n\n# From the mini...et quirk\ndf_new.drop([\'TOTAL\'], inplace=True)', u"# Once outliers are removed, data values shoul...0 again\ndf_new_scaled.fillna(0, inplace = True)", u"# create a dictionary from the dataframe\ndf_dict = df_new_scaled.to_dict('index')", u'### Task 3: Create new feature(s)\n### Store t...set for easy export below.\nmy_dataset = df_dict', u'### Extract features and labels from dataset f...ue)\nlabels, features = targetFeatureSplit(data)', u"### Task 4: Try a varity of classifiers\n### P... a starting point. Try a variety of classifiers.", u'# First one tried is RandomForestClassifier\nr...on.fit(features_exploration, labels_exploration)', u'# Also trying a decision tree classifier becau...on.fit(features_exploration, labels_exploration)', u'# This function appends the feature and accord...[1], reverse = True)\n    \n    return impt_list', u'# Call previous function to append and sort fe...atures[1:], dc_exploration.feature_importances_)', u'print "RandomForestClassifier importances valu..._impt:\n    print item[0] + " : " + str(item[1])', u'print "DecisionTreeClassifier importances valu..._impt:\n    print item[0] + " : " + str(item[1])', ...], 'MinMaxScaler': <class 'sklearn.preprocessing.data.MinMaxScaler'>, 'Out': {7: array(['bonus', 'deferral_payments', 'deferred_i...n_plus_expenses', 'bon_sal_ratio'], dtype=object), 17: [('restricted_stock', 0.21352158097564938), ('bon_plus_expenses', 0.20789151356080482), ('expenses', 0.18742229744098179), ('exercised_stock_options', 0.1178661654327829), ('other', 0.10873640794900637), ('from_messages', 0.087128532010421783), ('from_this_person_to_poi', 0.054368203974503197), ('to_msg_ratio', 0.023065298655849813), ('salary', 0.0), ('bonus', 0.0), ('long_term_incentive', 0.0), ('deferred_income', 0.0), ('total_payments', 0.0), ('to_messages', 0.0), ('email_address', 0.0), ('from_poi_to_this_person', 0.0), ('shared_receipt_with_poi', 0.0), ('from_msg_ratio', 0.0), ('bon_sal_ratio', 0.0)], 32: {'max_features': 3, 'min_samples_leaf': 1, 'min_samples_split': 2, 'random_state': 2}, 33: {'max_features': 2, 'min_samples_leaf': 1, 'min_samples_split': 8, 'random_state': 2}, 34: DecisionTreeClassifier(class_weight=None, criter...  presort=False, random_state=2, splitter='best'), 43: array(['bonus', 'deferral_payments', 'deferred_i...n_plus_expenses', 'bon_sal_ratio'], dtype=object), 53: [('expenses', 0.22970867831003985), ('bon_plus_expenses', 0.20789151356080485), ('exercised_stock_options', 0.16860982247565254), ('restricted_stock', 0.12049154306372169), ('other', 0.10873640794900638), ('from_messages', 0.087128532010421797), ('to_msg_ratio', 0.077433502630353007), ('salary', 0.0), ('bonus', 0.0), ('long_term_incentive', 0.0), ('deferred_income', 0.0), ('total_payments', 0.0), ('to_messages', 0.0), ('email_address', 0.0), ('from_poi_to_this_person', 0.0), ('from_this_person_to_poi', 0.0), ('shared_receipt_with_poi', 0.0), ('from_msg_ratio', 0.0), ('bon_sal_ratio', 0.0)]}, 'Pipeline': <class 'sklearn.pipeline.Pipeline'>, 'RandomForestClassifier': <class 'sklearn.ensemble.forest.RandomForestClassifier'>, 'RandomizedSearchCV': <class 'sklearn.model_selection._search.RandomizedSearchCV'>, 'SelectKBest': <class 'sklearn.feature_selection.univariate_selection.SelectKBest'>, 'StratifiedShuffleSplit': <class 'sklearn.model_selection._split.StratifiedShuffleSplit'>, ...}
   2883             finally:
   2884                 # Reset our crash handler in place
   2885                 sys.excepthook = old_excepthook
   2886         except SystemExit as e:

...........................................................................
/Users/bean/dand_files/ud120-projects/final_project/<ipython-input-60-443113a9d8f8> in <module>()
      6                  'min_samples_leaf': [1, 2, 3, 10, 20],
      7                 'random_state' : [2]
      8                 }
      9 grid_cv_dc = GridSearchCV(estimator = dc, param_grid = dc_param_grid, cv = best_cv,
     10                          n_jobs = 5, scoring = 'f1')
---> 11 grid_cv_dc.fit(features, labels)
     12 end_gridcv_dc = time.time()
     13 print "Minutes elapsed: " + str((float(end_gridcv_dc - start_gridcv_dc) / 60))

...........................................................................
/Users/bean/anaconda2/lib/python2.7/site-packages/sklearn/model_selection/_search.py in fit(self=GridSearchCV(cv=StratifiedShuffleSplit(n_splits=...train_score=True,
       scoring='f1', verbose=0), X=[array([ 0.5203862]), array([ 0.15046828]), array([ 0.06573571]), array([ 0.09157943]), array([ 0.65436733]), array([ 0.03840904]), array([ 0.12902096]), array([ 0.11605736]), array([ 0.17589919]), array([ 0.06218991]), array([ 0.09434207]), array([ 0.15973813]), array([ 0.1280414]), array([ 0.15112844]), array([ 0.10284579]), array([ 0.0431508]), array([ 0.07970968]), array([ 0.3833977]), array([ 0.10573577]), array([ 0.05928975]), ...], y=[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...], groups=None, **fit_params={})
    633                                   return_train_score=self.return_train_score,
    634                                   return_n_test_samples=True,
    635                                   return_times=True, return_parameters=False,
    636                                   error_score=self.error_score)
    637           for parameters, (train, test) in product(candidate_params,
--> 638                                                    cv.split(X, y, groups)))
        cv.split = <bound method StratifiedShuffleSplit.split of St...est_size='default',
            train_size=None)>
        X = [array([ 0.5203862]), array([ 0.15046828]), array([ 0.06573571]), array([ 0.09157943]), array([ 0.65436733]), array([ 0.03840904]), array([ 0.12902096]), array([ 0.11605736]), array([ 0.17589919]), array([ 0.06218991]), array([ 0.09434207]), array([ 0.15973813]), array([ 0.1280414]), array([ 0.15112844]), array([ 0.10284579]), array([ 0.0431508]), array([ 0.07970968]), array([ 0.3833977]), array([ 0.10573577]), array([ 0.05928975]), ...]
        y = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...]
        groups = None
    639 
    640         # if one choose to see train score, "out" will contain train score info
    641         if self.return_train_score:
    642             (train_score_dicts, test_score_dicts, test_sample_counts, fit_time,

...........................................................................
/Users/bean/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=Parallel(n_jobs=5), iterable=<generator object <genexpr>>)
    784             if pre_dispatch == "all" or n_jobs == 1:
    785                 # The iterable was consumed all at once by the above for loop.
    786                 # No need to wait for async callbacks to trigger to
    787                 # consumption.
    788                 self._iterating = False
--> 789             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=5)>
    790             # Make sure that we get a last message telling us we are done
    791             elapsed_time = time.time() - self._start_time
    792             self._print('Done %3i out of %3i | elapsed: %s finished',
    793                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
ValueError                                         Sun Oct  8 01:18:04 2017
PID: 7064                   Python 2.7.13: /Users/bean/anaconda2/bin/python
...........................................................................
/Users/bean/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = <function _fit_and_score>
        args = (DecisionTreeClassifier(class_weight=None, criter...  presort=False, random_state=2, splitter='best'), [array([ 0.5203862]), array([ 0.15046828]), array([ 0.06573571]), array([ 0.09157943]), array([ 0.65436733]), array([ 0.03840904]), array([ 0.12902096]), array([ 0.11605736]), array([ 0.17589919]), array([ 0.06218991]), array([ 0.09434207]), array([ 0.15973813]), array([ 0.1280414]), array([ 0.15112844]), array([ 0.10284579]), array([ 0.0431508]), array([ 0.07970968]), array([ 0.3833977]), array([ 0.10573577]), array([ 0.05928975]), ...], [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...], {'score': make_scorer(f1_score)}, array([28, 24, 58, 55, 33, 12, 34, 53, 44, 65, 2...  1, 41, 38, 15, 48, 31,  0, 45, 18, 30, 63,  6]), array([67, 20, 50,  8, 13, 54, 37]), 0, {'max_features': 2, 'min_samples_leaf': 1, 'min_samples_split': 2, 'random_state': 2})
        kwargs = {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': True}
        self.items = [(<function _fit_and_score>, (DecisionTreeClassifier(class_weight=None, criter...  presort=False, random_state=2, splitter='best'), [array([ 0.5203862]), array([ 0.15046828]), array([ 0.06573571]), array([ 0.09157943]), array([ 0.65436733]), array([ 0.03840904]), array([ 0.12902096]), array([ 0.11605736]), array([ 0.17589919]), array([ 0.06218991]), array([ 0.09434207]), array([ 0.15973813]), array([ 0.1280414]), array([ 0.15112844]), array([ 0.10284579]), array([ 0.0431508]), array([ 0.07970968]), array([ 0.3833977]), array([ 0.10573577]), array([ 0.05928975]), ...], [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...], {'score': make_scorer(f1_score)}, array([35, 44,  3, 13, 41, 37, 39, 33, 68, 60,  ... 61,  8, 22, 36, 11, 18, 25,  7, 54, 23, 24, 26]), array([17, 55, 31, 34,  2, 29, 58]), 0, {'max_features': 1, 'min_samples_leaf': 20, 'min_samples_split': 50, 'random_state': 2}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': True}), (<function _fit_and_score>, (DecisionTreeClassifier(class_weight=None, criter...  presort=False, random_state=2, splitter='best'), [array([ 0.5203862]), array([ 0.15046828]), array([ 0.06573571]), array([ 0.09157943]), array([ 0.65436733]), array([ 0.03840904]), array([ 0.12902096]), array([ 0.11605736]), array([ 0.17589919]), array([ 0.06218991]), array([ 0.09434207]), array([ 0.15973813]), array([ 0.1280414]), array([ 0.15112844]), array([ 0.10284579]), array([ 0.0431508]), array([ 0.07970968]), array([ 0.3833977]), array([ 0.10573577]), array([ 0.05928975]), ...], [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...], {'score': make_scorer(f1_score)}, array([50, 42, 41, 48, 10, 39, 30, 43, 27, 21, 1... 61, 26, 17,  0,  2, 44, 67, 62, 53,  1, 51,  6]), array([55, 64, 68, 59, 23, 29, 49]), 0, {'max_features': 1, 'min_samples_leaf': 20, 'min_samples_split': 50, 'random_state': 2}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': True}), (<function _fit_and_score>, (DecisionTreeClassifier(class_weight=None, criter...  presort=False, random_state=2, splitter='best'), [array([ 0.5203862]), array([ 0.15046828]), array([ 0.06573571]), array([ 0.09157943]), array([ 0.65436733]), array([ 0.03840904]), array([ 0.12902096]), array([ 0.11605736]), array([ 0.17589919]), array([ 0.06218991]), array([ 0.09434207]), array([ 0.15973813]), array([ 0.1280414]), array([ 0.15112844]), array([ 0.10284579]), array([ 0.0431508]), array([ 0.07970968]), array([ 0.3833977]), array([ 0.10573577]), array([ 0.05928975]), ...], [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...], {'score': make_scorer(f1_score)}, array([46, 65, 39,  9, 25,  8, 41, 54, 53, 28, 2... 19, 45, 17, 34, 26,  7,  0,  2, 10, 66, 15, 61]), array([37, 59, 33,  6, 14, 69, 21]), 0, {'max_features': 1, 'min_samples_leaf': 20, 'min_samples_split': 50, 'random_state': 2}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': True}), (<function _fit_and_score>, (DecisionTreeClassifier(class_weight=None, criter...  presort=False, random_state=2, splitter='best'), [array([ 0.5203862]), array([ 0.15046828]), array([ 0.06573571]), array([ 0.09157943]), array([ 0.65436733]), array([ 0.03840904]), array([ 0.12902096]), array([ 0.11605736]), array([ 0.17589919]), array([ 0.06218991]), array([ 0.09434207]), array([ 0.15973813]), array([ 0.1280414]), array([ 0.15112844]), array([ 0.10284579]), array([ 0.0431508]), array([ 0.07970968]), array([ 0.3833977]), array([ 0.10573577]), array([ 0.05928975]), ...], [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...], {'score': make_scorer(f1_score)}, array([47,  1, 52, 63, 40, 37,  6, 44, 27, 33, 1... 28, 64, 29,  7, 15, 69,  3, 46, 60, 53, 35, 65]), array([58, 10, 59,  0, 68, 22, 48]), 0, {'max_features': 1, 'min_samples_leaf': 20, 'min_samples_split': 50, 'random_state': 2}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': True}), (<function _fit_and_score>, (DecisionTreeClassifier(class_weight=None, criter...  presort=False, random_state=2, splitter='best'), [array([ 0.5203862]), array([ 0.15046828]), array([ 0.06573571]), array([ 0.09157943]), array([ 0.65436733]), array([ 0.03840904]), array([ 0.12902096]), array([ 0.11605736]), array([ 0.17589919]), array([ 0.06218991]), array([ 0.09434207]), array([ 0.15973813]), array([ 0.1280414]), array([ 0.15112844]), array([ 0.10284579]), array([ 0.0431508]), array([ 0.07970968]), array([ 0.3833977]), array([ 0.10573577]), array([ 0.05928975]), ...], [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...], {'score': make_scorer(f1_score)}, array([44, 55, 66, 20, 61, 13, 15, 18, 47, 22, 6... 57, 39,  4,  9, 38, 54, 51, 19, 29, 11, 48, 23]), array([45, 41, 36,  1, 16, 64, 14]), 0, {'max_features': 1, 'min_samples_leaf': 20, 'min_samples_split': 50, 'random_state': 2}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': True}), (<function _fit_and_score>, (DecisionTreeClassifier(class_weight=None, criter...  presort=False, random_state=2, splitter='best'), [array([ 0.5203862]), array([ 0.15046828]), array([ 0.06573571]), array([ 0.09157943]), array([ 0.65436733]), array([ 0.03840904]), array([ 0.12902096]), array([ 0.11605736]), array([ 0.17589919]), array([ 0.06218991]), array([ 0.09434207]), array([ 0.15973813]), array([ 0.1280414]), array([ 0.15112844]), array([ 0.10284579]), array([ 0.0431508]), array([ 0.07970968]), array([ 0.3833977]), array([ 0.10573577]), array([ 0.05928975]), ...], [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...], {'score': make_scorer(f1_score)}, array([41, 47, 17, 65,  2, 44, 25, 39,  9, 31, 3...  5, 18, 21, 53, 51, 22, 16,  8, 57, 43,  4, 66]), array([67, 37, 69, 33, 56, 32, 58]), 0, {'max_features': 1, 'min_samples_leaf': 20, 'min_samples_split': 50, 'random_state': 2}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': True}), (<function _fit_and_score>, (DecisionTreeClassifier(class_weight=None, criter...  presort=False, random_state=2, splitter='best'), [array([ 0.5203862]), array([ 0.15046828]), array([ 0.06573571]), array([ 0.09157943]), array([ 0.65436733]), array([ 0.03840904]), array([ 0.12902096]), array([ 0.11605736]), array([ 0.17589919]), array([ 0.06218991]), array([ 0.09434207]), array([ 0.15973813]), array([ 0.1280414]), array([ 0.15112844]), array([ 0.10284579]), array([ 0.0431508]), array([ 0.07970968]), array([ 0.3833977]), array([ 0.10573577]), array([ 0.05928975]), ...], [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...], {'score': make_scorer(f1_score)}, array([44,  5,  8, 63, 20, 58, 19, 65, 62, 52, 2... 67, 45, 38,  6,  3, 24, 51, 55,  0,  9, 14, 48]), array([53, 39, 66, 60, 69,  4, 35]), 0, {'max_features': 1, 'min_samples_leaf': 20, 'min_samples_split': 50, 'random_state': 2}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': True}), (<function _fit_and_score>, (DecisionTreeClassifier(class_weight=None, criter...  presort=False, random_state=2, splitter='best'), [array([ 0.5203862]), array([ 0.15046828]), array([ 0.06573571]), array([ 0.09157943]), array([ 0.65436733]), array([ 0.03840904]), array([ 0.12902096]), array([ 0.11605736]), array([ 0.17589919]), array([ 0.06218991]), array([ 0.09434207]), array([ 0.15973813]), array([ 0.1280414]), array([ 0.15112844]), array([ 0.10284579]), array([ 0.0431508]), array([ 0.07970968]), array([ 0.3833977]), array([ 0.10573577]), array([ 0.05928975]), ...], [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...], {'score': make_scorer(f1_score)}, array([21, 18, 24, 32, 25, 44, 10,  1, 22,  2, 3... 58, 29, 67, 15, 43, 19, 16, 28, 17,  6, 23, 55]), array([ 0, 35, 20, 38, 37, 69, 12]), 0, {'max_features': 1, 'min_samples_leaf': 20, 'min_samples_split': 50, 'random_state': 2}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': True}), (<function _fit_and_score>, (DecisionTreeClassifier(class_weight=None, criter...  presort=False, random_state=2, splitter='best'), [array([ 0.5203862]), array([ 0.15046828]), array([ 0.06573571]), array([ 0.09157943]), array([ 0.65436733]), array([ 0.03840904]), array([ 0.12902096]), array([ 0.11605736]), array([ 0.17589919]), array([ 0.06218991]), array([ 0.09434207]), array([ 0.15973813]), array([ 0.1280414]), array([ 0.15112844]), array([ 0.10284579]), array([ 0.0431508]), array([ 0.07970968]), array([ 0.3833977]), array([ 0.10573577]), array([ 0.05928975]), ...], [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...], {'score': make_scorer(f1_score)}, array([61, 56, 64,  4, 60,  9, 34, 30, 65, 18, 1...  8, 14, 12, 24, 51, 67, 47, 15,  5, 40, 54,  1]), array([46, 33, 29, 50, 63, 57, 20]), 0, {'max_features': 1, 'min_samples_leaf': 20, 'min_samples_split': 50, 'random_state': 2}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': True}), (<function _fit_and_score>, (DecisionTreeClassifier(class_weight=None, criter...  presort=False, random_state=2, splitter='best'), [array([ 0.5203862]), array([ 0.15046828]), array([ 0.06573571]), array([ 0.09157943]), array([ 0.65436733]), array([ 0.03840904]), array([ 0.12902096]), array([ 0.11605736]), array([ 0.17589919]), array([ 0.06218991]), array([ 0.09434207]), array([ 0.15973813]), array([ 0.1280414]), array([ 0.15112844]), array([ 0.10284579]), array([ 0.0431508]), array([ 0.07970968]), array([ 0.3833977]), array([ 0.10573577]), array([ 0.05928975]), ...], [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...], {'score': make_scorer(f1_score)}, array([52, 58,  1, 67, 59, 57,  3, 65,  7, 54,  ... 49, 41,  8, 20, 45, 39, 21, 63, 14, 29, 64, 23]), array([68, 44, 62, 42,  4, 53, 55]), 0, {'max_features': 1, 'min_samples_leaf': 20, 'min_samples_split': 50, 'random_state': 2}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': True}), (<function _fit_and_score>, (DecisionTreeClassifier(class_weight=None, criter...  presort=False, random_state=2, splitter='best'), [array([ 0.5203862]), array([ 0.15046828]), array([ 0.06573571]), array([ 0.09157943]), array([ 0.65436733]), array([ 0.03840904]), array([ 0.12902096]), array([ 0.11605736]), array([ 0.17589919]), array([ 0.06218991]), array([ 0.09434207]), array([ 0.15973813]), array([ 0.1280414]), array([ 0.15112844]), array([ 0.10284579]), array([ 0.0431508]), array([ 0.07970968]), array([ 0.3833977]), array([ 0.10573577]), array([ 0.05928975]), ...], [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...], {'score': make_scorer(f1_score)}, array([28, 24, 58, 55, 33, 12, 34, 53, 44, 65, 2...  1, 41, 38, 15, 48, 31,  0, 45, 18, 30, 63,  6]), array([67, 20, 50,  8, 13, 54, 37]), 0, {'max_features': 2, 'min_samples_leaf': 1, 'min_samples_split': 2, 'random_state': 2}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': True}), (<function _fit_and_score>, (DecisionTreeClassifier(class_weight=None, criter..., random_state=None,
            splitter='best'), [array([ 0.5203862]), array([ 0.15046828]), array([ 0.06573571]), array([ 0.09157943]), array([ 0.65436733]), array([ 0.03840904]), array([ 0.12902096]), array([ 0.11605736]), array([ 0.17589919]), array([ 0.06218991]), array([ 0.09434207]), array([ 0.15973813]), array([ 0.1280414]), array([ 0.15112844]), array([ 0.10284579]), array([ 0.0431508]), array([ 0.07970968]), array([ 0.3833977]), array([ 0.10573577]), array([ 0.05928975]), ...], [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...], {'score': make_scorer(f1_score)}, array([40, 29,  4, 64, 49, 57, 44, 46, 69,  3, 4...  6, 27,  8, 43, 12, 68, 59, 63, 38, 50, 65, 48]), array([ 5, 45, 33,  2,  0, 19, 32]), 0, {'max_features': 2, 'min_samples_leaf': 1, 'min_samples_split': 2, 'random_state': 2}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': True}), (<function _fit_and_score>, (DecisionTreeClassifier(class_weight=None, criter..., random_state=None,
            splitter='best'), [array([ 0.5203862]), array([ 0.15046828]), array([ 0.06573571]), array([ 0.09157943]), array([ 0.65436733]), array([ 0.03840904]), array([ 0.12902096]), array([ 0.11605736]), array([ 0.17589919]), array([ 0.06218991]), array([ 0.09434207]), array([ 0.15973813]), array([ 0.1280414]), array([ 0.15112844]), array([ 0.10284579]), array([ 0.0431508]), array([ 0.07970968]), array([ 0.3833977]), array([ 0.10573577]), array([ 0.05928975]), ...], [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...], {'score': make_scorer(f1_score)}, array([66,  2,  1, 32, 58,  4, 31, 57, 47, 17, 3... 23, 13,  8, 40, 39, 68,  9, 30, 53, 22, 33, 46]), array([16, 48, 61, 43,  5,  0, 64]), 0, {'max_features': 2, 'min_samples_leaf': 1, 'min_samples_split': 2, 'random_state': 2}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': True}), (<function _fit_and_score>, (DecisionTreeClassifier(class_weight=None, criter..., random_state=None,
            splitter='best'), [array([ 0.5203862]), array([ 0.15046828]), array([ 0.06573571]), array([ 0.09157943]), array([ 0.65436733]), array([ 0.03840904]), array([ 0.12902096]), array([ 0.11605736]), array([ 0.17589919]), array([ 0.06218991]), array([ 0.09434207]), array([ 0.15973813]), array([ 0.1280414]), array([ 0.15112844]), array([ 0.10284579]), array([ 0.0431508]), array([ 0.07970968]), array([ 0.3833977]), array([ 0.10573577]), array([ 0.05928975]), ...], [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...], {'score': make_scorer(f1_score)}, array([66, 10,  5,  7, 15, 16, 19, 38,  4, 65, 4... 64, 24, 13, 23, 58, 49, 12, 34, 21, 44, 22, 18]), array([ 6, 11, 67, 42, 20, 53, 54]), 0, {'max_features': 2, 'min_samples_leaf': 1, 'min_samples_split': 2, 'random_state': 2}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': True}), (<function _fit_and_score>, (DecisionTreeClassifier(class_weight=None, criter..., random_state=None,
            splitter='best'), [array([ 0.5203862]), array([ 0.15046828]), array([ 0.06573571]), array([ 0.09157943]), array([ 0.65436733]), array([ 0.03840904]), array([ 0.12902096]), array([ 0.11605736]), array([ 0.17589919]), array([ 0.06218991]), array([ 0.09434207]), array([ 0.15973813]), array([ 0.1280414]), array([ 0.15112844]), array([ 0.10284579]), array([ 0.0431508]), array([ 0.07970968]), array([ 0.3833977]), array([ 0.10573577]), array([ 0.05928975]), ...], [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...], {'score': make_scorer(f1_score)}, array([52, 65, 18, 12, 30, 60, 16, 27, 63,  7, 3... 10, 41, 56, 49, 46,  5, 69, 39, 48, 38, 61, 55]), array([37, 31, 26, 47, 28, 13, 15]), 0, {'max_features': 2, 'min_samples_leaf': 1, 'min_samples_split': 2, 'random_state': 2}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': True}), (<function _fit_and_score>, (DecisionTreeClassifier(class_weight=None, criter..., random_state=None,
            splitter='best'), [array([ 0.5203862]), array([ 0.15046828]), array([ 0.06573571]), array([ 0.09157943]), array([ 0.65436733]), array([ 0.03840904]), array([ 0.12902096]), array([ 0.11605736]), array([ 0.17589919]), array([ 0.06218991]), array([ 0.09434207]), array([ 0.15973813]), array([ 0.1280414]), array([ 0.15112844]), array([ 0.10284579]), array([ 0.0431508]), array([ 0.07970968]), array([ 0.3833977]), array([ 0.10573577]), array([ 0.05928975]), ...], [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...], {'score': make_scorer(f1_score)}, array([60, 19,  4, 65, 31, 28, 33,  9, 11, 27,  ... 10, 66, 14,  0, 40, 50, 38, 46, 68, 17,  5,  3]), array([47, 15, 43, 69, 48, 26, 58]), 0, {'max_features': 2, 'min_samples_leaf': 1, 'min_samples_split': 2, 'random_state': 2}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': True}), (<function _fit_and_score>, (DecisionTreeClassifier(class_weight=None, criter..., random_state=None,
            splitter='best'), [array([ 0.5203862]), array([ 0.15046828]), array([ 0.06573571]), array([ 0.09157943]), array([ 0.65436733]), array([ 0.03840904]), array([ 0.12902096]), array([ 0.11605736]), array([ 0.17589919]), array([ 0.06218991]), array([ 0.09434207]), array([ 0.15973813]), array([ 0.1280414]), array([ 0.15112844]), array([ 0.10284579]), array([ 0.0431508]), array([ 0.07970968]), array([ 0.3833977]), array([ 0.10573577]), array([ 0.05928975]), ...], [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...], {'score': make_scorer(f1_score)}, array([69, 15, 12, 50,  8, 16, 28, 31, 65,  6, 4... 62, 66, 51, 41, 22, 26, 59,  4, 57, 64, 30, 58]), array([20, 44, 23, 17, 34, 38, 13]), 0, {'max_features': 2, 'min_samples_leaf': 1, 'min_samples_split': 2, 'random_state': 2}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': True}), (<function _fit_and_score>, (DecisionTreeClassifier(class_weight=None, criter..., random_state=None,
            splitter='best'), [array([ 0.5203862]), array([ 0.15046828]), array([ 0.06573571]), array([ 0.09157943]), array([ 0.65436733]), array([ 0.03840904]), array([ 0.12902096]), array([ 0.11605736]), array([ 0.17589919]), array([ 0.06218991]), array([ 0.09434207]), array([ 0.15973813]), array([ 0.1280414]), array([ 0.15112844]), array([ 0.10284579]), array([ 0.0431508]), array([ 0.07970968]), array([ 0.3833977]), array([ 0.10573577]), array([ 0.05928975]), ...], [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...], {'score': make_scorer(f1_score)}, array([20, 47, 12, 65, 58, 11, 33,  1, 60, 16, 6...  5, 54,  8, 18, 31, 67, 44, 34, 13, 10, 14, 62]), array([27, 41,  6, 56, 17, 59, 48]), 0, {'max_features': 2, 'min_samples_leaf': 1, 'min_samples_split': 2, 'random_state': 2}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': True}), (<function _fit_and_score>, (DecisionTreeClassifier(class_weight=None, criter..., random_state=None,
            splitter='best'), [array([ 0.5203862]), array([ 0.15046828]), array([ 0.06573571]), array([ 0.09157943]), array([ 0.65436733]), array([ 0.03840904]), array([ 0.12902096]), array([ 0.11605736]), array([ 0.17589919]), array([ 0.06218991]), array([ 0.09434207]), array([ 0.15973813]), array([ 0.1280414]), array([ 0.15112844]), array([ 0.10284579]), array([ 0.0431508]), array([ 0.07970968]), array([ 0.3833977]), array([ 0.10573577]), array([ 0.05928975]), ...], [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...], {'score': make_scorer(f1_score)}, array([56, 10, 17, 12, 43, 39, 45,  3, 54, 21,  ... 57,  2, 18, 35, 11, 26, 52, 68, 41, 62, 31, 27]), array([33, 60, 28, 20, 37, 53, 67]), 0, {'max_features': 2, 'min_samples_leaf': 1, 'min_samples_split': 2, 'random_state': 2}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': True}), (<function _fit_and_score>, (DecisionTreeClassifier(class_weight=None, criter..., random_state=None,
            splitter='best'), [array([ 0.5203862]), array([ 0.15046828]), array([ 0.06573571]), array([ 0.09157943]), array([ 0.65436733]), array([ 0.03840904]), array([ 0.12902096]), array([ 0.11605736]), array([ 0.17589919]), array([ 0.06218991]), array([ 0.09434207]), array([ 0.15973813]), array([ 0.1280414]), array([ 0.15112844]), array([ 0.10284579]), array([ 0.0431508]), array([ 0.07970968]), array([ 0.3833977]), array([ 0.10573577]), array([ 0.05928975]), ...], [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...], {'score': make_scorer(f1_score)}, array([17, 21, 11, 61, 31, 56, 45, 35, 57, 52,  ... 37, 32, 30, 27,  7, 36, 12, 68, 62, 53, 48, 50]), array([18, 29, 64, 33, 66, 24, 38]), 0, {'max_features': 2, 'min_samples_leaf': 1, 'min_samples_split': 2, 'random_state': 2}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': True})]
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
/Users/bean/anaconda2/lib/python2.7/site-packages/sklearn/model_selection/_validation.py in _fit_and_score(estimator=DecisionTreeClassifier(class_weight=None, criter...  presort=False, random_state=2, splitter='best'), X=[array([ 0.5203862]), array([ 0.15046828]), array([ 0.06573571]), array([ 0.09157943]), array([ 0.65436733]), array([ 0.03840904]), array([ 0.12902096]), array([ 0.11605736]), array([ 0.17589919]), array([ 0.06218991]), array([ 0.09434207]), array([ 0.15973813]), array([ 0.1280414]), array([ 0.15112844]), array([ 0.10284579]), array([ 0.0431508]), array([ 0.07970968]), array([ 0.3833977]), array([ 0.10573577]), array([ 0.05928975]), ...], y=[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...], scorer={'score': make_scorer(f1_score)}, train=array([28, 24, 58, 55, 33, 12, 34, 53, 44, 65, 2...  1, 41, 38, 15, 48, 31,  0, 45, 18, 30, 63,  6]), test=array([67, 20, 50,  8, 13, 54, 37]), verbose=0, parameters={'max_features': 2, 'min_samples_leaf': 1, 'min_samples_split': 2, 'random_state': 2}, fit_params={}, return_train_score=True, return_parameters=False, return_n_test_samples=True, return_times=True, error_score='raise')
    432 
    433     try:
    434         if y_train is None:
    435             estimator.fit(X_train, **fit_params)
    436         else:
--> 437             estimator.fit(X_train, y_train, **fit_params)
        estimator.fit = <bound method DecisionTreeClassifier.fit of Deci... presort=False, random_state=2, splitter='best')>
        X_train = [array([ 0.16844708]), array([ 0.09637635]), array([ 0.22323644]), array([ 0.0412015]), array([ 0.09018879]), array([ 0.1280414]), array([ 0.15232789]), array([ 0.10109786]), array([ 1.]), array([ 0.06416692]), array([ 0.04644615]), array([ 0.69933662]), array([ 0.11160952]), array([ 0.32249358]), array([ 0.1294426]), array([ 0.14022707]), array([ 0.05684551]), array([ 0.10274094]), array([ 0.0550413]), array([ 0.09786488]), ...]
        y_train = [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...]
        fit_params = {}
    438 
    439     except Exception as e:
    440         # Note fit time as time until error
    441         fit_time = time.time() - start_time

...........................................................................
/Users/bean/anaconda2/lib/python2.7/site-packages/sklearn/tree/tree.py in fit(self=DecisionTreeClassifier(class_weight=None, criter...  presort=False, random_state=2, splitter='best'), X=[array([ 0.16844708]), array([ 0.09637635]), array([ 0.22323644]), array([ 0.0412015]), array([ 0.09018879]), array([ 0.1280414]), array([ 0.15232789]), array([ 0.10109786]), array([ 1.]), array([ 0.06416692]), array([ 0.04644615]), array([ 0.69933662]), array([ 0.11160952]), array([ 0.32249358]), array([ 0.1294426]), array([ 0.14022707]), array([ 0.05684551]), array([ 0.10274094]), array([ 0.0550413]), array([ 0.09786488]), ...], y=[1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...], sample_weight=None, check_input=True, X_idx_sorted=None)
    785 
    786         super(DecisionTreeClassifier, self).fit(
    787             X, y,
    788             sample_weight=sample_weight,
    789             check_input=check_input,
--> 790             X_idx_sorted=X_idx_sorted)
        X_idx_sorted = None
    791         return self
    792 
    793     def predict_proba(self, X, check_input=True):
    794         """Predict class probabilities of the input samples X.

...........................................................................
/Users/bean/anaconda2/lib/python2.7/site-packages/sklearn/tree/tree.py in fit(self=DecisionTreeClassifier(class_weight=None, criter...  presort=False, random_state=2, splitter='best'), X=array([[ 0.16844708],
       [ 0.09637635],
    ....02768905],
       [ 0.12902096]], dtype=float32), y=array([[ 1.],
       [ 0.],
       [ 1.],
      ...[ 0.],
       [ 0.],
       [ 1.],
       [ 0.]]), sample_weight=None, check_input=True, X_idx_sorted=None)
    237         if not 0 <= self.min_weight_fraction_leaf <= 0.5:
    238             raise ValueError("min_weight_fraction_leaf must in [0, 0.5]")
    239         if max_depth <= 0:
    240             raise ValueError("max_depth must be greater than zero. ")
    241         if not (0 < max_features <= self.n_features_):
--> 242             raise ValueError("max_features must be in (0, n_features]")
    243         if not isinstance(max_leaf_nodes, (numbers.Integral, np.integer)):
    244             raise ValueError("max_leaf_nodes must be integral number but was "
    245                              "%r" % max_leaf_nodes)
    246         if -1 < max_leaf_nodes < 2:

ValueError: max_features must be in (0, n_features]
___________________________________________________________________________

In [None]:
# Try RandomSearchCV instead as an option for reviewer
from scipy.stats import randint as sp_randint
start_rcv_dc = time.time()
param_dist = { 'min_samples_split': sp_randint(2,10),
              'max_features' : sp_randint(1,5),
              'min_samples_leaf': sp_randint(1,5),
              'random_state': [2]
    
}
rcv_dc = RandomizedSearchCV(estimator = dc, param_distributions = param_dist, 
                           cv = best_cv, scoring = 'f1', n_jobs = 3, n_iter = 20)
rcv_dc.fit(features_test, labels_test)
end_rcv_dc = time.time()
print "Minutes elapsed: " + str((float(end_rcv_dc - start_rcv_dc) / 60))

In [None]:
#print classification_report(labels_train, grid_cv_rfc.best_estimator_.predict(features_train))

In [None]:
print classification_report(labels_train, grid_cv_dc.best_estimator_.predict(features_train))

In [None]:
#print classification_report(labels_test, grid_cv_rfc.best_estimator_.predict(features_test))

In [None]:
print classification_report(labels_test, grid_cv_dc.best_estimator_.predict(features_test))

In [None]:
#grid_cv_rfc.best_params_

In [None]:
# Assign clf to classifer chosen after testing with tester.py
# Parameters are selected from GridSearchCV's best_params_ attributes

#clf = RandomForestClassifier(min_samples_split = 5, n_estimators = 3,
#                            random_state = 2, max_depth = 50, min_samples_leaf = 1,
#                            max_features = 1)
#clf.fit(features, labels)

In [None]:
grid_cv_dc.best_params_

In [None]:
rcv_dc.best_params_

In [62]:
# Parameters are selected from GridSearchCV's best_params_ attributes
# I ended up choosing DecisionTreeClassifier because it performed better with
# precision and recall in tester.py
clf = DecisionTreeClassifier(min_samples_split = 2, random_state = 2,
                            max_features = 1, min_samples_leaf = 1)
clf.fit(features, labels)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=1, max_leaf_nodes=None, min_impurity_decrease=0.0,
            min_impurity_split=None, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=2, splitter='best')

In [75]:
from sklearn.metrics import f1_score
labels_pred = clf.predict(features_test)
f1_score(labels_test, labels_test, labels_pred)

1.0

In [63]:
### Task 6: Dump your classifier, dataset, and features_list so anyone can
### check your results. You do not need to change anything below, but make sure
### that the version of poi_id.py that you submit can be run on its own and
### generates the necessary .pkl files for validating your results.

dump_classifier_and_data(clf, my_dataset, features_list)
