In [13]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.linear_model import LinearRegression, LogisticRegression, Ridge, Lasso
from sklearn.preprocessing import MinMaxScaler, StandardScaler, OneHotEncoder
from sklearn.metrics import accuracy_score, get_scorer_names, confusion_matrix, classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.ensemble import HistGradientBoostingClassifier, ExtraTreesClassifier
from sklearn.naive_bayes import GaussianNB, CategoricalNB
from xgboost import XGBClassifier
import xgboost as xgb
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from statistics import mode as md

In [14]:
X_test = pd.read_csv("../tanzanian_water_wells/X_test.csv")
X_train = pd.read_csv("../tanzanian_water_wells/X_train.csv")
y_train = pd.read_csv("../tanzanian_water_wells/y_train.csv")

df = pd.concat([X_train, y_train], axis=1)

In [15]:
desc = {'amount_tsh': 'Total static head (amount water available to waterpoint)',
                    'date_recorded': 'The date the row was entered',
                    'funder': 'Who funded the well',
                    'gps_height': 'Altitude of the well',
                    'installer': 'Organization that installed the well',
                    'longitude': 'GPS coordinate',
                    'latitude': 'GPS coordinate',
                    'wpt_name': 'Name of the waterpoint if there is one',
                    'subvillage': 'Geographic location',
                    'region': 'Geographic location',
                    'region_code': 'Geographic location (coded)',
                    'district_code': 'Geographic location (coded)',
                    'lga': 'Geographic location',
                    'ward': 'Geographic location',
                    'population': 'Population around the well',
                    'public_meeting': 'True/False',
                    'recorded_by': 'Group entering this row of data',
                    'scheme_management': 'Who operates the waterpoint',
                    'scheme_name': 'Who operates the waterpoint',
                    'permit': 'If the waterpoint is permitted',
                    'construction_year': 'Year the waterpoint was constructed',
                    'extraction_type': 'The kind of extraction the waterpoint uses',
                    'extraction_type_group': 'The kind of extraction the waterpoint uses',
                    'extraction_type_class': 'The kind of extraction the waterpoint uses',
                    'management': 'How the waterpoint is managed',
                    'management_group': 'How the waterpoint is managed',
                    'payment': 'What the water costs',
                    'payment_type': 'What the water costs',
                    'water_quality': 'The quality of the water',
                    'quality_group': 'The quality of the water',
                    'quantity': 'The quantity of water',
                    'quantity_group': 'The quantity of water',
                    'source': 'The source of the water',
                    'source_type': 'The source of the water',
                    'source_class': 'The source of the water',
                    'waterpoint_type': 'The kind of waterpoint',
                    'waterpoint_type_group': 'The kind of waterpoint'}

In [16]:
# Eliminating null values

df.funder.fillna("Unknown", inplace=True)
df.installer.fillna("Unknown", inplace=True)
df.scheme_management.fillna("None", inplace=True)
df.permit.fillna('Unknown', inplace=True)
df.scheme_name.fillna('Unknown', inplace=True)
df.subvillage.fillna('Unknown', inplace=True)
df.public_meeting.fillna('Unknown', inplace=True)

# Defining the train and test sets

In [17]:
X = df.copy()

columns = ['amount_tsh', 'gps_height', 'population', 'region', 'lga', 
           'scheme_management', 'permit', 'construction_year',
           'extraction_type_group', 'payment', 'management', 
           'quality_group', 'quantity', 'source', 'waterpoint_type']

X = X[columns]

# X['public_meeting'] = X['public_meeting'].map({True: 'Yes', False: 'No', 'Unknown': 'Unknown'})
X['permit'] = X['permit'].map({True: 'Yes', False: 'No', 'Unknown': 'Unknown'})
X['gps_height'] = X['gps_height'].astype('float64')
# X['district_code'] = X['district_code'].astype('float64')
X['population'] = X['population'].astype('float64')
# X['district_code'] = X['district_code'].astype('object')

X_cat = X.drop(list(X.select_dtypes(['float64']).columns), axis=1)
X_numeric = X[list(X.select_dtypes(['float64']).columns)]

y = df['status_group']

X_cat = pd.get_dummies(X_cat)

X = pd.concat([X_numeric, X_cat], axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y)

scaler = StandardScaler()
scaler.fit(X_train)
X_train = pd.DataFrame(scaler.transform(X_train),
                index = X_train.index,
                columns = X_train.columns)
X_test = pd.DataFrame(scaler.transform(X_test),
                index = X_test.index,
                columns = X_test.columns)

X_train.reset_index(inplace=True, drop=True)
y_train = y_train.reset_index(drop=True)

In [18]:
X_train_sample = X_train.sample(n=1000, random_state=42)
y_train_sample = y_train.iloc[list(X_train_sample.index)]

# Decision Tree

In [21]:
dtc = DecisionTreeClassifier(random_state=42)

In [40]:
param_grid = {
    'min_samples_split': [1, 5, 10, 20, 40],
    'min_samples_leaf': [1, 5, 10, 20],  
    'max_features': [5, 10, 50, 100, None], 
    'max_depth': [1, 3, 5, 7, 10, None]
}

gs_dtc = GridSearchCV(dtc, param_grid, cv=3, verbose=10)
gs_dtc.fit(X_train, y_train)

Fitting 3 folds for each of 600 candidates, totalling 1800 fits
[CV 1/3; 1/600] START max_depth=1, max_features=5, min_samples_leaf=1, min_samples_split=1
[CV 1/3; 1/600] END max_depth=1, max_features=5, min_samples_leaf=1, min_samples_split=1;, score=nan total time=   0.0s
[CV 2/3; 1/600] START max_depth=1, max_features=5, min_samples_leaf=1, min_samples_split=1
[CV 2/3; 1/600] END max_depth=1, max_features=5, min_samples_leaf=1, min_samples_split=1;, score=nan total time=   0.0s
[CV 3/3; 1/600] START max_depth=1, max_features=5, min_samples_leaf=1, min_samples_split=1
[CV 3/3; 1/600] END max_depth=1, max_features=5, min_samples_leaf=1, min_samples_split=1;, score=nan total time=   0.0s
[CV 1/3; 2/600] START max_depth=1, max_features=5, min_samples_leaf=1, min_samples_split=5
[CV 1/3; 2/600] END max_depth=1, max_features=5, min_samples_leaf=1, min_samples_split=5;, score=0.543 total time=   0.0s
[CV 2/3; 2/600] START max_depth=1, max_features=5, min_samples_leaf=1, min_samples_split=5

[CV 2/3; 15/600] END max_depth=1, max_features=5, min_samples_leaf=10, min_samples_split=40;, score=0.543 total time=   0.0s
[CV 3/3; 15/600] START max_depth=1, max_features=5, min_samples_leaf=10, min_samples_split=40
[CV 3/3; 15/600] END max_depth=1, max_features=5, min_samples_leaf=10, min_samples_split=40;, score=0.543 total time=   0.0s
[CV 1/3; 16/600] START max_depth=1, max_features=5, min_samples_leaf=20, min_samples_split=1
[CV 1/3; 16/600] END max_depth=1, max_features=5, min_samples_leaf=20, min_samples_split=1;, score=nan total time=   0.0s
[CV 2/3; 16/600] START max_depth=1, max_features=5, min_samples_leaf=20, min_samples_split=1
[CV 2/3; 16/600] END max_depth=1, max_features=5, min_samples_leaf=20, min_samples_split=1;, score=nan total time=   0.0s
[CV 3/3; 16/600] START max_depth=1, max_features=5, min_samples_leaf=20, min_samples_split=1
[CV 3/3; 16/600] END max_depth=1, max_features=5, min_samples_leaf=20, min_samples_split=1;, score=nan total time=   0.0s
[CV 1/3; 17

[CV 1/3; 28/600] END max_depth=1, max_features=10, min_samples_leaf=5, min_samples_split=10;, score=0.557 total time=   0.0s
[CV 2/3; 28/600] START max_depth=1, max_features=10, min_samples_leaf=5, min_samples_split=10
[CV 2/3; 28/600] END max_depth=1, max_features=10, min_samples_leaf=5, min_samples_split=10;, score=0.543 total time=   0.0s
[CV 3/3; 28/600] START max_depth=1, max_features=10, min_samples_leaf=5, min_samples_split=10
[CV 3/3; 28/600] END max_depth=1, max_features=10, min_samples_leaf=5, min_samples_split=10;, score=0.544 total time=   0.0s
[CV 1/3; 29/600] START max_depth=1, max_features=10, min_samples_leaf=5, min_samples_split=20
[CV 1/3; 29/600] END max_depth=1, max_features=10, min_samples_leaf=5, min_samples_split=20;, score=0.596 total time=   0.0s
[CV 2/3; 29/600] START max_depth=1, max_features=10, min_samples_leaf=5, min_samples_split=20
[CV 2/3; 29/600] END max_depth=1, max_features=10, min_samples_leaf=5, min_samples_split=20;, score=0.543 total time=   0.0s

[CV 1/3; 42/600] END max_depth=1, max_features=50, min_samples_leaf=1, min_samples_split=5;, score=0.543 total time=   0.0s
[CV 2/3; 42/600] START max_depth=1, max_features=50, min_samples_leaf=1, min_samples_split=5
[CV 2/3; 42/600] END max_depth=1, max_features=50, min_samples_leaf=1, min_samples_split=5;, score=0.543 total time=   0.0s
[CV 3/3; 42/600] START max_depth=1, max_features=50, min_samples_leaf=1, min_samples_split=5
[CV 3/3; 42/600] END max_depth=1, max_features=50, min_samples_leaf=1, min_samples_split=5;, score=0.543 total time=   0.0s
[CV 1/3; 43/600] START max_depth=1, max_features=50, min_samples_leaf=1, min_samples_split=10
[CV 1/3; 43/600] END max_depth=1, max_features=50, min_samples_leaf=1, min_samples_split=10;, score=0.596 total time=   0.0s
[CV 2/3; 43/600] START max_depth=1, max_features=50, min_samples_leaf=1, min_samples_split=10
[CV 2/3; 43/600] END max_depth=1, max_features=50, min_samples_leaf=1, min_samples_split=10;, score=0.613 total time=   0.0s
[CV 

[CV 1/3; 55/600] END max_depth=1, max_features=50, min_samples_leaf=10, min_samples_split=40;, score=0.640 total time=   0.0s
[CV 2/3; 55/600] START max_depth=1, max_features=50, min_samples_leaf=10, min_samples_split=40
[CV 2/3; 55/600] END max_depth=1, max_features=50, min_samples_leaf=10, min_samples_split=40;, score=0.618 total time=   0.0s
[CV 3/3; 55/600] START max_depth=1, max_features=50, min_samples_leaf=10, min_samples_split=40
[CV 3/3; 55/600] END max_depth=1, max_features=50, min_samples_leaf=10, min_samples_split=40;, score=0.543 total time=   0.0s
[CV 1/3; 56/600] START max_depth=1, max_features=50, min_samples_leaf=20, min_samples_split=1
[CV 1/3; 56/600] END max_depth=1, max_features=50, min_samples_leaf=20, min_samples_split=1;, score=nan total time=   0.0s
[CV 2/3; 56/600] START max_depth=1, max_features=50, min_samples_leaf=20, min_samples_split=1
[CV 2/3; 56/600] END max_depth=1, max_features=50, min_samples_leaf=20, min_samples_split=1;, score=nan total time=   0.0

[CV 1/3; 68/600] END max_depth=1, max_features=100, min_samples_leaf=5, min_samples_split=10;, score=0.596 total time=   0.1s
[CV 2/3; 68/600] START max_depth=1, max_features=100, min_samples_leaf=5, min_samples_split=10
[CV 2/3; 68/600] END max_depth=1, max_features=100, min_samples_leaf=5, min_samples_split=10;, score=0.618 total time=   0.0s
[CV 3/3; 68/600] START max_depth=1, max_features=100, min_samples_leaf=5, min_samples_split=10
[CV 3/3; 68/600] END max_depth=1, max_features=100, min_samples_leaf=5, min_samples_split=10;, score=0.606 total time=   0.1s
[CV 1/3; 69/600] START max_depth=1, max_features=100, min_samples_leaf=5, min_samples_split=20
[CV 1/3; 69/600] END max_depth=1, max_features=100, min_samples_leaf=5, min_samples_split=20;, score=0.640 total time=   0.1s
[CV 2/3; 69/600] START max_depth=1, max_features=100, min_samples_leaf=5, min_samples_split=20
[CV 2/3; 69/600] END max_depth=1, max_features=100, min_samples_leaf=5, min_samples_split=20;, score=0.596 total tim

[CV 2/3; 80/600] END max_depth=1, max_features=100, min_samples_leaf=20, min_samples_split=40;, score=0.642 total time=   0.1s
[CV 3/3; 80/600] START max_depth=1, max_features=100, min_samples_leaf=20, min_samples_split=40
[CV 3/3; 80/600] END max_depth=1, max_features=100, min_samples_leaf=20, min_samples_split=40;, score=0.593 total time=   0.1s
[CV 1/3; 81/600] START max_depth=1, max_features=None, min_samples_leaf=1, min_samples_split=1
[CV 1/3; 81/600] END max_depth=1, max_features=None, min_samples_leaf=1, min_samples_split=1;, score=nan total time=   0.0s
[CV 2/3; 81/600] START max_depth=1, max_features=None, min_samples_leaf=1, min_samples_split=1
[CV 2/3; 81/600] END max_depth=1, max_features=None, min_samples_leaf=1, min_samples_split=1;, score=nan total time=   0.0s
[CV 3/3; 81/600] START max_depth=1, max_features=None, min_samples_leaf=1, min_samples_split=1
[CV 3/3; 81/600] END max_depth=1, max_features=None, min_samples_leaf=1, min_samples_split=1;, score=nan total time= 

[CV 1/3; 93/600] END max_depth=1, max_features=None, min_samples_leaf=10, min_samples_split=10;, score=0.640 total time=   0.1s
[CV 2/3; 93/600] START max_depth=1, max_features=None, min_samples_leaf=10, min_samples_split=10
[CV 2/3; 93/600] END max_depth=1, max_features=None, min_samples_leaf=10, min_samples_split=10;, score=0.642 total time=   0.1s
[CV 3/3; 93/600] START max_depth=1, max_features=None, min_samples_leaf=10, min_samples_split=10
[CV 3/3; 93/600] END max_depth=1, max_features=None, min_samples_leaf=10, min_samples_split=10;, score=0.647 total time=   0.1s
[CV 1/3; 94/600] START max_depth=1, max_features=None, min_samples_leaf=10, min_samples_split=20
[CV 1/3; 94/600] END max_depth=1, max_features=None, min_samples_leaf=10, min_samples_split=20;, score=0.640 total time=   0.1s
[CV 2/3; 94/600] START max_depth=1, max_features=None, min_samples_leaf=10, min_samples_split=20
[CV 2/3; 94/600] END max_depth=1, max_features=None, min_samples_leaf=10, min_samples_split=20;, sco

[CV 2/3; 106/600] END max_depth=3, max_features=5, min_samples_leaf=5, min_samples_split=1;, score=nan total time=   0.0s
[CV 3/3; 106/600] START max_depth=3, max_features=5, min_samples_leaf=5, min_samples_split=1
[CV 3/3; 106/600] END max_depth=3, max_features=5, min_samples_leaf=5, min_samples_split=1;, score=nan total time=   0.0s
[CV 1/3; 107/600] START max_depth=3, max_features=5, min_samples_leaf=5, min_samples_split=5
[CV 1/3; 107/600] END max_depth=3, max_features=5, min_samples_leaf=5, min_samples_split=5;, score=0.552 total time=   0.0s
[CV 2/3; 107/600] START max_depth=3, max_features=5, min_samples_leaf=5, min_samples_split=5
[CV 2/3; 107/600] END max_depth=3, max_features=5, min_samples_leaf=5, min_samples_split=5;, score=0.562 total time=   0.0s
[CV 3/3; 107/600] START max_depth=3, max_features=5, min_samples_leaf=5, min_samples_split=5
[CV 3/3; 107/600] END max_depth=3, max_features=5, min_samples_leaf=5, min_samples_split=5;, score=0.578 total time=   0.0s
[CV 1/3; 108

[CV 3/3; 119/600] END max_depth=3, max_features=5, min_samples_leaf=20, min_samples_split=20;, score=0.554 total time=   0.0s
[CV 1/3; 120/600] START max_depth=3, max_features=5, min_samples_leaf=20, min_samples_split=40
[CV 1/3; 120/600] END max_depth=3, max_features=5, min_samples_leaf=20, min_samples_split=40;, score=0.557 total time=   0.0s
[CV 2/3; 120/600] START max_depth=3, max_features=5, min_samples_leaf=20, min_samples_split=40
[CV 2/3; 120/600] END max_depth=3, max_features=5, min_samples_leaf=20, min_samples_split=40;, score=0.563 total time=   0.0s
[CV 3/3; 120/600] START max_depth=3, max_features=5, min_samples_leaf=20, min_samples_split=40
[CV 3/3; 120/600] END max_depth=3, max_features=5, min_samples_leaf=20, min_samples_split=40;, score=0.562 total time=   0.0s
[CV 1/3; 121/600] START max_depth=3, max_features=10, min_samples_leaf=1, min_samples_split=1
[CV 1/3; 121/600] END max_depth=3, max_features=10, min_samples_leaf=1, min_samples_split=1;, score=nan total time=  

[CV 2/3; 133/600] END max_depth=3, max_features=10, min_samples_leaf=10, min_samples_split=10;, score=0.631 total time=   0.0s
[CV 3/3; 133/600] START max_depth=3, max_features=10, min_samples_leaf=10, min_samples_split=10
[CV 3/3; 133/600] END max_depth=3, max_features=10, min_samples_leaf=10, min_samples_split=10;, score=0.607 total time=   0.0s
[CV 1/3; 134/600] START max_depth=3, max_features=10, min_samples_leaf=10, min_samples_split=20
[CV 1/3; 134/600] END max_depth=3, max_features=10, min_samples_leaf=10, min_samples_split=20;, score=0.563 total time=   0.0s
[CV 2/3; 134/600] START max_depth=3, max_features=10, min_samples_leaf=10, min_samples_split=20
[CV 2/3; 134/600] END max_depth=3, max_features=10, min_samples_leaf=10, min_samples_split=20;, score=0.625 total time=   0.0s
[CV 3/3; 134/600] START max_depth=3, max_features=10, min_samples_leaf=10, min_samples_split=20
[CV 3/3; 134/600] END max_depth=3, max_features=10, min_samples_leaf=10, min_samples_split=20;, score=0.553 

[CV 1/3; 148/600] END max_depth=3, max_features=50, min_samples_leaf=5, min_samples_split=10;, score=0.695 total time=   0.1s
[CV 2/3; 148/600] START max_depth=3, max_features=50, min_samples_leaf=5, min_samples_split=10
[CV 2/3; 148/600] END max_depth=3, max_features=50, min_samples_leaf=5, min_samples_split=10;, score=0.690 total time=   0.1s
[CV 3/3; 148/600] START max_depth=3, max_features=50, min_samples_leaf=5, min_samples_split=10
[CV 3/3; 148/600] END max_depth=3, max_features=50, min_samples_leaf=5, min_samples_split=10;, score=0.615 total time=   0.1s
[CV 1/3; 149/600] START max_depth=3, max_features=50, min_samples_leaf=5, min_samples_split=20
[CV 1/3; 149/600] END max_depth=3, max_features=50, min_samples_leaf=5, min_samples_split=20;, score=0.656 total time=   0.1s
[CV 2/3; 149/600] START max_depth=3, max_features=50, min_samples_leaf=5, min_samples_split=20
[CV 2/3; 149/600] END max_depth=3, max_features=50, min_samples_leaf=5, min_samples_split=20;, score=0.689 total tim

[CV 3/3; 160/600] END max_depth=3, max_features=50, min_samples_leaf=20, min_samples_split=40;, score=0.649 total time=   0.1s
[CV 1/3; 161/600] START max_depth=3, max_features=100, min_samples_leaf=1, min_samples_split=1
[CV 1/3; 161/600] END max_depth=3, max_features=100, min_samples_leaf=1, min_samples_split=1;, score=nan total time=   0.0s
[CV 2/3; 161/600] START max_depth=3, max_features=100, min_samples_leaf=1, min_samples_split=1
[CV 2/3; 161/600] END max_depth=3, max_features=100, min_samples_leaf=1, min_samples_split=1;, score=nan total time=   0.0s
[CV 3/3; 161/600] START max_depth=3, max_features=100, min_samples_leaf=1, min_samples_split=1
[CV 3/3; 161/600] END max_depth=3, max_features=100, min_samples_leaf=1, min_samples_split=1;, score=nan total time=   0.0s
[CV 1/3; 162/600] START max_depth=3, max_features=100, min_samples_leaf=1, min_samples_split=5
[CV 1/3; 162/600] END max_depth=3, max_features=100, min_samples_leaf=1, min_samples_split=5;, score=0.695 total time=   

[CV 2/3; 173/600] END max_depth=3, max_features=100, min_samples_leaf=10, min_samples_split=10;, score=0.693 total time=   0.1s
[CV 3/3; 173/600] START max_depth=3, max_features=100, min_samples_leaf=10, min_samples_split=10
[CV 3/3; 173/600] END max_depth=3, max_features=100, min_samples_leaf=10, min_samples_split=10;, score=0.680 total time=   0.1s
[CV 1/3; 174/600] START max_depth=3, max_features=100, min_samples_leaf=10, min_samples_split=20
[CV 1/3; 174/600] END max_depth=3, max_features=100, min_samples_leaf=10, min_samples_split=20;, score=0.692 total time=   0.1s
[CV 2/3; 174/600] START max_depth=3, max_features=100, min_samples_leaf=10, min_samples_split=20
[CV 2/3; 174/600] END max_depth=3, max_features=100, min_samples_leaf=10, min_samples_split=20;, score=0.694 total time=   0.1s
[CV 3/3; 174/600] START max_depth=3, max_features=100, min_samples_leaf=10, min_samples_split=20
[CV 3/3; 174/600] END max_depth=3, max_features=100, min_samples_leaf=10, min_samples_split=20;, sco

[CV 1/3; 187/600] END max_depth=3, max_features=None, min_samples_leaf=5, min_samples_split=5;, score=0.696 total time=   0.1s
[CV 2/3; 187/600] START max_depth=3, max_features=None, min_samples_leaf=5, min_samples_split=5
[CV 2/3; 187/600] END max_depth=3, max_features=None, min_samples_leaf=5, min_samples_split=5;, score=0.693 total time=   0.1s
[CV 3/3; 187/600] START max_depth=3, max_features=None, min_samples_leaf=5, min_samples_split=5
[CV 3/3; 187/600] END max_depth=3, max_features=None, min_samples_leaf=5, min_samples_split=5;, score=0.694 total time=   0.1s
[CV 1/3; 188/600] START max_depth=3, max_features=None, min_samples_leaf=5, min_samples_split=10
[CV 1/3; 188/600] END max_depth=3, max_features=None, min_samples_leaf=5, min_samples_split=10;, score=0.696 total time=   0.1s
[CV 2/3; 188/600] START max_depth=3, max_features=None, min_samples_leaf=5, min_samples_split=10
[CV 2/3; 188/600] END max_depth=3, max_features=None, min_samples_leaf=5, min_samples_split=10;, score=0.

[CV 2/3; 199/600] END max_depth=3, max_features=None, min_samples_leaf=20, min_samples_split=20;, score=0.693 total time=   0.1s
[CV 3/3; 199/600] START max_depth=3, max_features=None, min_samples_leaf=20, min_samples_split=20
[CV 3/3; 199/600] END max_depth=3, max_features=None, min_samples_leaf=20, min_samples_split=20;, score=0.694 total time=   0.1s
[CV 1/3; 200/600] START max_depth=3, max_features=None, min_samples_leaf=20, min_samples_split=40
[CV 1/3; 200/600] END max_depth=3, max_features=None, min_samples_leaf=20, min_samples_split=40;, score=0.696 total time=   0.1s
[CV 2/3; 200/600] START max_depth=3, max_features=None, min_samples_leaf=20, min_samples_split=40
[CV 2/3; 200/600] END max_depth=3, max_features=None, min_samples_leaf=20, min_samples_split=40;, score=0.693 total time=   0.1s
[CV 3/3; 200/600] START max_depth=3, max_features=None, min_samples_leaf=20, min_samples_split=40
[CV 3/3; 200/600] END max_depth=3, max_features=None, min_samples_leaf=20, min_samples_split

[CV 1/3; 212/600] END max_depth=5, max_features=5, min_samples_leaf=10, min_samples_split=5;, score=0.574 total time=   0.0s
[CV 2/3; 212/600] START max_depth=5, max_features=5, min_samples_leaf=10, min_samples_split=5
[CV 2/3; 212/600] END max_depth=5, max_features=5, min_samples_leaf=10, min_samples_split=5;, score=0.608 total time=   0.0s
[CV 3/3; 212/600] START max_depth=5, max_features=5, min_samples_leaf=10, min_samples_split=5
[CV 3/3; 212/600] END max_depth=5, max_features=5, min_samples_leaf=10, min_samples_split=5;, score=0.590 total time=   0.0s
[CV 1/3; 213/600] START max_depth=5, max_features=5, min_samples_leaf=10, min_samples_split=10
[CV 1/3; 213/600] END max_depth=5, max_features=5, min_samples_leaf=10, min_samples_split=10;, score=0.627 total time=   0.0s
[CV 2/3; 213/600] START max_depth=5, max_features=5, min_samples_leaf=10, min_samples_split=10
[CV 2/3; 213/600] END max_depth=5, max_features=5, min_samples_leaf=10, min_samples_split=10;, score=0.559 total time=   

[CV 1/3; 225/600] END max_depth=5, max_features=10, min_samples_leaf=1, min_samples_split=40;, score=0.657 total time=   0.0s
[CV 2/3; 225/600] START max_depth=5, max_features=10, min_samples_leaf=1, min_samples_split=40
[CV 2/3; 225/600] END max_depth=5, max_features=10, min_samples_leaf=1, min_samples_split=40;, score=0.616 total time=   0.0s
[CV 3/3; 225/600] START max_depth=5, max_features=10, min_samples_leaf=1, min_samples_split=40
[CV 3/3; 225/600] END max_depth=5, max_features=10, min_samples_leaf=1, min_samples_split=40;, score=0.618 total time=   0.0s
[CV 1/3; 226/600] START max_depth=5, max_features=10, min_samples_leaf=5, min_samples_split=1
[CV 1/3; 226/600] END max_depth=5, max_features=10, min_samples_leaf=5, min_samples_split=1;, score=nan total time=   0.0s
[CV 2/3; 226/600] START max_depth=5, max_features=10, min_samples_leaf=5, min_samples_split=1
[CV 2/3; 226/600] END max_depth=5, max_features=10, min_samples_leaf=5, min_samples_split=1;, score=nan total time=   0.0

[CV 3/3; 238/600] END max_depth=5, max_features=10, min_samples_leaf=20, min_samples_split=10;, score=0.617 total time=   0.0s
[CV 1/3; 239/600] START max_depth=5, max_features=10, min_samples_leaf=20, min_samples_split=20
[CV 1/3; 239/600] END max_depth=5, max_features=10, min_samples_leaf=20, min_samples_split=20;, score=0.578 total time=   0.0s
[CV 2/3; 239/600] START max_depth=5, max_features=10, min_samples_leaf=20, min_samples_split=20
[CV 2/3; 239/600] END max_depth=5, max_features=10, min_samples_leaf=20, min_samples_split=20;, score=0.609 total time=   0.0s
[CV 3/3; 239/600] START max_depth=5, max_features=10, min_samples_leaf=20, min_samples_split=20
[CV 3/3; 239/600] END max_depth=5, max_features=10, min_samples_leaf=20, min_samples_split=20;, score=0.623 total time=   0.0s
[CV 1/3; 240/600] START max_depth=5, max_features=10, min_samples_leaf=20, min_samples_split=40
[CV 1/3; 240/600] END max_depth=5, max_features=10, min_samples_leaf=20, min_samples_split=40;, score=0.575 

[CV 3/3; 252/600] END max_depth=5, max_features=50, min_samples_leaf=10, min_samples_split=5;, score=0.699 total time=   0.1s
[CV 1/3; 253/600] START max_depth=5, max_features=50, min_samples_leaf=10, min_samples_split=10
[CV 1/3; 253/600] END max_depth=5, max_features=50, min_samples_leaf=10, min_samples_split=10;, score=0.686 total time=   0.1s
[CV 2/3; 253/600] START max_depth=5, max_features=50, min_samples_leaf=10, min_samples_split=10
[CV 2/3; 253/600] END max_depth=5, max_features=50, min_samples_leaf=10, min_samples_split=10;, score=0.688 total time=   0.1s
[CV 3/3; 253/600] START max_depth=5, max_features=50, min_samples_leaf=10, min_samples_split=10
[CV 3/3; 253/600] END max_depth=5, max_features=50, min_samples_leaf=10, min_samples_split=10;, score=0.666 total time=   0.1s
[CV 1/3; 254/600] START max_depth=5, max_features=50, min_samples_leaf=10, min_samples_split=20
[CV 1/3; 254/600] END max_depth=5, max_features=50, min_samples_leaf=10, min_samples_split=20;, score=0.645 t

[CV 1/3; 265/600] END max_depth=5, max_features=100, min_samples_leaf=1, min_samples_split=40;, score=0.700 total time=   0.1s
[CV 2/3; 265/600] START max_depth=5, max_features=100, min_samples_leaf=1, min_samples_split=40
[CV 2/3; 265/600] END max_depth=5, max_features=100, min_samples_leaf=1, min_samples_split=40;, score=0.698 total time=   0.1s
[CV 3/3; 265/600] START max_depth=5, max_features=100, min_samples_leaf=1, min_samples_split=40
[CV 3/3; 265/600] END max_depth=5, max_features=100, min_samples_leaf=1, min_samples_split=40;, score=0.713 total time=   0.1s
[CV 1/3; 266/600] START max_depth=5, max_features=100, min_samples_leaf=5, min_samples_split=1
[CV 1/3; 266/600] END max_depth=5, max_features=100, min_samples_leaf=5, min_samples_split=1;, score=nan total time=   0.0s
[CV 2/3; 266/600] START max_depth=5, max_features=100, min_samples_leaf=5, min_samples_split=1
[CV 2/3; 266/600] END max_depth=5, max_features=100, min_samples_leaf=5, min_samples_split=1;, score=nan total ti

[CV 3/3; 277/600] END max_depth=5, max_features=100, min_samples_leaf=20, min_samples_split=5;, score=0.706 total time=   0.1s
[CV 1/3; 278/600] START max_depth=5, max_features=100, min_samples_leaf=20, min_samples_split=10
[CV 1/3; 278/600] END max_depth=5, max_features=100, min_samples_leaf=20, min_samples_split=10;, score=0.707 total time=   0.1s
[CV 2/3; 278/600] START max_depth=5, max_features=100, min_samples_leaf=20, min_samples_split=10
[CV 2/3; 278/600] END max_depth=5, max_features=100, min_samples_leaf=20, min_samples_split=10;, score=0.705 total time=   0.1s
[CV 3/3; 278/600] START max_depth=5, max_features=100, min_samples_leaf=20, min_samples_split=10
[CV 3/3; 278/600] END max_depth=5, max_features=100, min_samples_leaf=20, min_samples_split=10;, score=0.711 total time=   0.1s
[CV 1/3; 279/600] START max_depth=5, max_features=100, min_samples_leaf=20, min_samples_split=20
[CV 1/3; 279/600] END max_depth=5, max_features=100, min_samples_leaf=20, min_samples_split=20;, scor

[CV 1/3; 290/600] END max_depth=5, max_features=None, min_samples_leaf=5, min_samples_split=40;, score=0.710 total time=   0.1s
[CV 2/3; 290/600] START max_depth=5, max_features=None, min_samples_leaf=5, min_samples_split=40
[CV 2/3; 290/600] END max_depth=5, max_features=None, min_samples_leaf=5, min_samples_split=40;, score=0.711 total time=   0.1s
[CV 3/3; 290/600] START max_depth=5, max_features=None, min_samples_leaf=5, min_samples_split=40
[CV 3/3; 290/600] END max_depth=5, max_features=None, min_samples_leaf=5, min_samples_split=40;, score=0.713 total time=   0.1s
[CV 1/3; 291/600] START max_depth=5, max_features=None, min_samples_leaf=10, min_samples_split=1
[CV 1/3; 291/600] END max_depth=5, max_features=None, min_samples_leaf=10, min_samples_split=1;, score=nan total time=   0.0s
[CV 2/3; 291/600] START max_depth=5, max_features=None, min_samples_leaf=10, min_samples_split=1
[CV 2/3; 291/600] END max_depth=5, max_features=None, min_samples_leaf=10, min_samples_split=1;, score

[CV 1/3; 303/600] END max_depth=7, max_features=5, min_samples_leaf=1, min_samples_split=10;, score=0.590 total time=   0.0s
[CV 2/3; 303/600] START max_depth=7, max_features=5, min_samples_leaf=1, min_samples_split=10
[CV 2/3; 303/600] END max_depth=7, max_features=5, min_samples_leaf=1, min_samples_split=10;, score=0.562 total time=   0.1s
[CV 3/3; 303/600] START max_depth=7, max_features=5, min_samples_leaf=1, min_samples_split=10
[CV 3/3; 303/600] END max_depth=7, max_features=5, min_samples_leaf=1, min_samples_split=10;, score=0.602 total time=   0.0s
[CV 1/3; 304/600] START max_depth=7, max_features=5, min_samples_leaf=1, min_samples_split=20
[CV 1/3; 304/600] END max_depth=7, max_features=5, min_samples_leaf=1, min_samples_split=20;, score=0.641 total time=   0.0s
[CV 2/3; 304/600] START max_depth=7, max_features=5, min_samples_leaf=1, min_samples_split=20
[CV 2/3; 304/600] END max_depth=7, max_features=5, min_samples_leaf=1, min_samples_split=20;, score=0.554 total time=   0.0s

[CV 2/3; 316/600] END max_depth=7, max_features=5, min_samples_leaf=20, min_samples_split=1;, score=nan total time=   0.0s
[CV 3/3; 316/600] START max_depth=7, max_features=5, min_samples_leaf=20, min_samples_split=1
[CV 3/3; 316/600] END max_depth=7, max_features=5, min_samples_leaf=20, min_samples_split=1;, score=nan total time=   0.0s
[CV 1/3; 317/600] START max_depth=7, max_features=5, min_samples_leaf=20, min_samples_split=5
[CV 1/3; 317/600] END max_depth=7, max_features=5, min_samples_leaf=20, min_samples_split=5;, score=0.599 total time=   0.0s
[CV 2/3; 317/600] START max_depth=7, max_features=5, min_samples_leaf=20, min_samples_split=5
[CV 2/3; 317/600] END max_depth=7, max_features=5, min_samples_leaf=20, min_samples_split=5;, score=0.566 total time=   0.0s
[CV 3/3; 317/600] START max_depth=7, max_features=5, min_samples_leaf=20, min_samples_split=5
[CV 3/3; 317/600] END max_depth=7, max_features=5, min_samples_leaf=20, min_samples_split=5;, score=0.576 total time=   0.0s
[CV

[CV 2/3; 329/600] END max_depth=7, max_features=10, min_samples_leaf=5, min_samples_split=20;, score=0.578 total time=   0.0s
[CV 3/3; 329/600] START max_depth=7, max_features=10, min_samples_leaf=5, min_samples_split=20
[CV 3/3; 329/600] END max_depth=7, max_features=10, min_samples_leaf=5, min_samples_split=20;, score=0.639 total time=   0.0s
[CV 1/3; 330/600] START max_depth=7, max_features=10, min_samples_leaf=5, min_samples_split=40
[CV 1/3; 330/600] END max_depth=7, max_features=10, min_samples_leaf=5, min_samples_split=40;, score=0.704 total time=   0.0s
[CV 2/3; 330/600] START max_depth=7, max_features=10, min_samples_leaf=5, min_samples_split=40
[CV 2/3; 330/600] END max_depth=7, max_features=10, min_samples_leaf=5, min_samples_split=40;, score=0.568 total time=   0.0s
[CV 3/3; 330/600] START max_depth=7, max_features=10, min_samples_leaf=5, min_samples_split=40
[CV 3/3; 330/600] END max_depth=7, max_features=10, min_samples_leaf=5, min_samples_split=40;, score=0.672 total tim

[CV 3/3; 342/600] END max_depth=7, max_features=50, min_samples_leaf=1, min_samples_split=5;, score=0.708 total time=   0.1s
[CV 1/3; 343/600] START max_depth=7, max_features=50, min_samples_leaf=1, min_samples_split=10
[CV 1/3; 343/600] END max_depth=7, max_features=50, min_samples_leaf=1, min_samples_split=10;, score=0.706 total time=   0.1s
[CV 2/3; 343/600] START max_depth=7, max_features=50, min_samples_leaf=1, min_samples_split=10
[CV 2/3; 343/600] END max_depth=7, max_features=50, min_samples_leaf=1, min_samples_split=10;, score=0.720 total time=   0.1s
[CV 3/3; 343/600] START max_depth=7, max_features=50, min_samples_leaf=1, min_samples_split=10
[CV 3/3; 343/600] END max_depth=7, max_features=50, min_samples_leaf=1, min_samples_split=10;, score=0.707 total time=   0.1s
[CV 1/3; 344/600] START max_depth=7, max_features=50, min_samples_leaf=1, min_samples_split=20
[CV 1/3; 344/600] END max_depth=7, max_features=50, min_samples_leaf=1, min_samples_split=20;, score=0.712 total time

[CV 3/3; 355/600] END max_depth=7, max_features=50, min_samples_leaf=10, min_samples_split=40;, score=0.706 total time=   0.1s
[CV 1/3; 356/600] START max_depth=7, max_features=50, min_samples_leaf=20, min_samples_split=1
[CV 1/3; 356/600] END max_depth=7, max_features=50, min_samples_leaf=20, min_samples_split=1;, score=nan total time=   0.0s
[CV 2/3; 356/600] START max_depth=7, max_features=50, min_samples_leaf=20, min_samples_split=1
[CV 2/3; 356/600] END max_depth=7, max_features=50, min_samples_leaf=20, min_samples_split=1;, score=nan total time=   0.0s
[CV 3/3; 356/600] START max_depth=7, max_features=50, min_samples_leaf=20, min_samples_split=1
[CV 3/3; 356/600] END max_depth=7, max_features=50, min_samples_leaf=20, min_samples_split=1;, score=nan total time=   0.0s
[CV 1/3; 357/600] START max_depth=7, max_features=50, min_samples_leaf=20, min_samples_split=5
[CV 1/3; 357/600] END max_depth=7, max_features=50, min_samples_leaf=20, min_samples_split=5;, score=0.708 total time=   

[CV 1/3; 368/600] END max_depth=7, max_features=100, min_samples_leaf=5, min_samples_split=10;, score=0.722 total time=   0.1s
[CV 2/3; 368/600] START max_depth=7, max_features=100, min_samples_leaf=5, min_samples_split=10
[CV 2/3; 368/600] END max_depth=7, max_features=100, min_samples_leaf=5, min_samples_split=10;, score=0.701 total time=   0.1s
[CV 3/3; 368/600] START max_depth=7, max_features=100, min_samples_leaf=5, min_samples_split=10
[CV 3/3; 368/600] END max_depth=7, max_features=100, min_samples_leaf=5, min_samples_split=10;, score=0.712 total time=   0.1s
[CV 1/3; 369/600] START max_depth=7, max_features=100, min_samples_leaf=5, min_samples_split=20
[CV 1/3; 369/600] END max_depth=7, max_features=100, min_samples_leaf=5, min_samples_split=20;, score=0.715 total time=   0.1s
[CV 2/3; 369/600] START max_depth=7, max_features=100, min_samples_leaf=5, min_samples_split=20
[CV 2/3; 369/600] END max_depth=7, max_features=100, min_samples_leaf=5, min_samples_split=20;, score=0.718 

[CV 1/3; 381/600] END max_depth=7, max_features=None, min_samples_leaf=1, min_samples_split=1;, score=nan total time=   0.0s
[CV 2/3; 381/600] START max_depth=7, max_features=None, min_samples_leaf=1, min_samples_split=1
[CV 2/3; 381/600] END max_depth=7, max_features=None, min_samples_leaf=1, min_samples_split=1;, score=nan total time=   0.0s
[CV 3/3; 381/600] START max_depth=7, max_features=None, min_samples_leaf=1, min_samples_split=1
[CV 3/3; 381/600] END max_depth=7, max_features=None, min_samples_leaf=1, min_samples_split=1;, score=nan total time=   0.0s
[CV 1/3; 382/600] START max_depth=7, max_features=None, min_samples_leaf=1, min_samples_split=5
[CV 1/3; 382/600] END max_depth=7, max_features=None, min_samples_leaf=1, min_samples_split=5;, score=0.718 total time=   0.2s
[CV 2/3; 382/600] START max_depth=7, max_features=None, min_samples_leaf=1, min_samples_split=5
[CV 2/3; 382/600] END max_depth=7, max_features=None, min_samples_leaf=1, min_samples_split=5;, score=0.724 total 

[CV 3/3; 393/600] END max_depth=7, max_features=None, min_samples_leaf=10, min_samples_split=10;, score=0.723 total time=   0.2s
[CV 1/3; 394/600] START max_depth=7, max_features=None, min_samples_leaf=10, min_samples_split=20
[CV 1/3; 394/600] END max_depth=7, max_features=None, min_samples_leaf=10, min_samples_split=20;, score=0.718 total time=   0.2s
[CV 2/3; 394/600] START max_depth=7, max_features=None, min_samples_leaf=10, min_samples_split=20
[CV 2/3; 394/600] END max_depth=7, max_features=None, min_samples_leaf=10, min_samples_split=20;, score=0.724 total time=   0.2s
[CV 3/3; 394/600] START max_depth=7, max_features=None, min_samples_leaf=10, min_samples_split=20
[CV 3/3; 394/600] END max_depth=7, max_features=None, min_samples_leaf=10, min_samples_split=20;, score=0.723 total time=   0.2s
[CV 1/3; 395/600] START max_depth=7, max_features=None, min_samples_leaf=10, min_samples_split=40
[CV 1/3; 395/600] END max_depth=7, max_features=None, min_samples_leaf=10, min_samples_split

[CV 1/3; 407/600] END max_depth=10, max_features=5, min_samples_leaf=5, min_samples_split=5;, score=0.607 total time=   0.0s
[CV 2/3; 407/600] START max_depth=10, max_features=5, min_samples_leaf=5, min_samples_split=5
[CV 2/3; 407/600] END max_depth=10, max_features=5, min_samples_leaf=5, min_samples_split=5;, score=0.650 total time=   0.0s
[CV 3/3; 407/600] START max_depth=10, max_features=5, min_samples_leaf=5, min_samples_split=5
[CV 3/3; 407/600] END max_depth=10, max_features=5, min_samples_leaf=5, min_samples_split=5;, score=0.620 total time=   0.0s
[CV 1/3; 408/600] START max_depth=10, max_features=5, min_samples_leaf=5, min_samples_split=10
[CV 1/3; 408/600] END max_depth=10, max_features=5, min_samples_leaf=5, min_samples_split=10;, score=0.643 total time=   0.0s
[CV 2/3; 408/600] START max_depth=10, max_features=5, min_samples_leaf=5, min_samples_split=10
[CV 2/3; 408/600] END max_depth=10, max_features=5, min_samples_leaf=5, min_samples_split=10;, score=0.651 total time=   

[CV 3/3; 419/600] END max_depth=10, max_features=5, min_samples_leaf=20, min_samples_split=20;, score=0.581 total time=   0.0s
[CV 1/3; 420/600] START max_depth=10, max_features=5, min_samples_leaf=20, min_samples_split=40
[CV 1/3; 420/600] END max_depth=10, max_features=5, min_samples_leaf=20, min_samples_split=40;, score=0.620 total time=   0.1s
[CV 2/3; 420/600] START max_depth=10, max_features=5, min_samples_leaf=20, min_samples_split=40
[CV 2/3; 420/600] END max_depth=10, max_features=5, min_samples_leaf=20, min_samples_split=40;, score=0.655 total time=   0.0s
[CV 3/3; 420/600] START max_depth=10, max_features=5, min_samples_leaf=20, min_samples_split=40
[CV 3/3; 420/600] END max_depth=10, max_features=5, min_samples_leaf=20, min_samples_split=40;, score=0.637 total time=   0.0s
[CV 1/3; 421/600] START max_depth=10, max_features=10, min_samples_leaf=1, min_samples_split=1
[CV 1/3; 421/600] END max_depth=10, max_features=10, min_samples_leaf=1, min_samples_split=1;, score=nan tota

[CV 1/3; 433/600] END max_depth=10, max_features=10, min_samples_leaf=10, min_samples_split=10;, score=0.674 total time=   0.1s
[CV 2/3; 433/600] START max_depth=10, max_features=10, min_samples_leaf=10, min_samples_split=10
[CV 2/3; 433/600] END max_depth=10, max_features=10, min_samples_leaf=10, min_samples_split=10;, score=0.647 total time=   0.1s
[CV 3/3; 433/600] START max_depth=10, max_features=10, min_samples_leaf=10, min_samples_split=10
[CV 3/3; 433/600] END max_depth=10, max_features=10, min_samples_leaf=10, min_samples_split=10;, score=0.633 total time=   0.1s
[CV 1/3; 434/600] START max_depth=10, max_features=10, min_samples_leaf=10, min_samples_split=20
[CV 1/3; 434/600] END max_depth=10, max_features=10, min_samples_leaf=10, min_samples_split=20;, score=0.665 total time=   0.1s
[CV 2/3; 434/600] START max_depth=10, max_features=10, min_samples_leaf=10, min_samples_split=20
[CV 2/3; 434/600] END max_depth=10, max_features=10, min_samples_leaf=10, min_samples_split=20;, sco

[CV 3/3; 445/600] END max_depth=10, max_features=50, min_samples_leaf=1, min_samples_split=40;, score=0.726 total time=   0.1s
[CV 1/3; 446/600] START max_depth=10, max_features=50, min_samples_leaf=5, min_samples_split=1
[CV 1/3; 446/600] END max_depth=10, max_features=50, min_samples_leaf=5, min_samples_split=1;, score=nan total time=   0.0s
[CV 2/3; 446/600] START max_depth=10, max_features=50, min_samples_leaf=5, min_samples_split=1
[CV 2/3; 446/600] END max_depth=10, max_features=50, min_samples_leaf=5, min_samples_split=1;, score=nan total time=   0.0s
[CV 3/3; 446/600] START max_depth=10, max_features=50, min_samples_leaf=5, min_samples_split=1
[CV 3/3; 446/600] END max_depth=10, max_features=50, min_samples_leaf=5, min_samples_split=1;, score=nan total time=   0.0s
[CV 1/3; 447/600] START max_depth=10, max_features=50, min_samples_leaf=5, min_samples_split=5
[CV 1/3; 447/600] END max_depth=10, max_features=50, min_samples_leaf=5, min_samples_split=5;, score=0.721 total time=   

[CV 3/3; 458/600] END max_depth=10, max_features=50, min_samples_leaf=20, min_samples_split=10;, score=0.721 total time=   0.1s
[CV 1/3; 459/600] START max_depth=10, max_features=50, min_samples_leaf=20, min_samples_split=20
[CV 1/3; 459/600] END max_depth=10, max_features=50, min_samples_leaf=20, min_samples_split=20;, score=0.724 total time=   0.1s
[CV 2/3; 459/600] START max_depth=10, max_features=50, min_samples_leaf=20, min_samples_split=20
[CV 2/3; 459/600] END max_depth=10, max_features=50, min_samples_leaf=20, min_samples_split=20;, score=0.721 total time=   0.1s
[CV 3/3; 459/600] START max_depth=10, max_features=50, min_samples_leaf=20, min_samples_split=20
[CV 3/3; 459/600] END max_depth=10, max_features=50, min_samples_leaf=20, min_samples_split=20;, score=0.729 total time=   0.1s
[CV 1/3; 460/600] START max_depth=10, max_features=50, min_samples_leaf=20, min_samples_split=40
[CV 1/3; 460/600] END max_depth=10, max_features=50, min_samples_leaf=20, min_samples_split=40;, sco

[CV 2/3; 472/600] END max_depth=10, max_features=100, min_samples_leaf=10, min_samples_split=5;, score=0.734 total time=   0.1s
[CV 3/3; 472/600] START max_depth=10, max_features=100, min_samples_leaf=10, min_samples_split=5
[CV 3/3; 472/600] END max_depth=10, max_features=100, min_samples_leaf=10, min_samples_split=5;, score=0.739 total time=   0.1s
[CV 1/3; 473/600] START max_depth=10, max_features=100, min_samples_leaf=10, min_samples_split=10
[CV 1/3; 473/600] END max_depth=10, max_features=100, min_samples_leaf=10, min_samples_split=10;, score=0.735 total time=   0.1s
[CV 2/3; 473/600] START max_depth=10, max_features=100, min_samples_leaf=10, min_samples_split=10
[CV 2/3; 473/600] END max_depth=10, max_features=100, min_samples_leaf=10, min_samples_split=10;, score=0.735 total time=   0.1s
[CV 3/3; 473/600] START max_depth=10, max_features=100, min_samples_leaf=10, min_samples_split=10
[CV 3/3; 473/600] END max_depth=10, max_features=100, min_samples_leaf=10, min_samples_split=10

[CV 3/3; 484/600] END max_depth=10, max_features=None, min_samples_leaf=1, min_samples_split=20;, score=0.741 total time=   0.2s
[CV 1/3; 485/600] START max_depth=10, max_features=None, min_samples_leaf=1, min_samples_split=40
[CV 1/3; 485/600] END max_depth=10, max_features=None, min_samples_leaf=1, min_samples_split=40;, score=0.734 total time=   0.2s
[CV 2/3; 485/600] START max_depth=10, max_features=None, min_samples_leaf=1, min_samples_split=40
[CV 2/3; 485/600] END max_depth=10, max_features=None, min_samples_leaf=1, min_samples_split=40;, score=0.731 total time=   0.2s
[CV 3/3; 485/600] START max_depth=10, max_features=None, min_samples_leaf=1, min_samples_split=40
[CV 3/3; 485/600] END max_depth=10, max_features=None, min_samples_leaf=1, min_samples_split=40;, score=0.742 total time=   0.2s
[CV 1/3; 486/600] START max_depth=10, max_features=None, min_samples_leaf=5, min_samples_split=1
[CV 1/3; 486/600] END max_depth=10, max_features=None, min_samples_leaf=5, min_samples_split=

[CV 1/3; 497/600] END max_depth=10, max_features=None, min_samples_leaf=20, min_samples_split=5;, score=0.732 total time=   0.2s
[CV 2/3; 497/600] START max_depth=10, max_features=None, min_samples_leaf=20, min_samples_split=5
[CV 2/3; 497/600] END max_depth=10, max_features=None, min_samples_leaf=20, min_samples_split=5;, score=0.732 total time=   0.2s
[CV 3/3; 497/600] START max_depth=10, max_features=None, min_samples_leaf=20, min_samples_split=5
[CV 3/3; 497/600] END max_depth=10, max_features=None, min_samples_leaf=20, min_samples_split=5;, score=0.738 total time=   0.2s
[CV 1/3; 498/600] START max_depth=10, max_features=None, min_samples_leaf=20, min_samples_split=10
[CV 1/3; 498/600] END max_depth=10, max_features=None, min_samples_leaf=20, min_samples_split=10;, score=0.732 total time=   0.2s
[CV 2/3; 498/600] START max_depth=10, max_features=None, min_samples_leaf=20, min_samples_split=10
[CV 2/3; 498/600] END max_depth=10, max_features=None, min_samples_leaf=20, min_samples_s

[CV 2/3; 509/600] END max_depth=None, max_features=5, min_samples_leaf=5, min_samples_split=20;, score=0.683 total time=   0.1s
[CV 3/3; 509/600] START max_depth=None, max_features=5, min_samples_leaf=5, min_samples_split=20
[CV 3/3; 509/600] END max_depth=None, max_features=5, min_samples_leaf=5, min_samples_split=20;, score=0.682 total time=   0.1s
[CV 1/3; 510/600] START max_depth=None, max_features=5, min_samples_leaf=5, min_samples_split=40
[CV 1/3; 510/600] END max_depth=None, max_features=5, min_samples_leaf=5, min_samples_split=40;, score=0.679 total time=   0.1s
[CV 2/3; 510/600] START max_depth=None, max_features=5, min_samples_leaf=5, min_samples_split=40
[CV 2/3; 510/600] END max_depth=None, max_features=5, min_samples_leaf=5, min_samples_split=40;, score=0.706 total time=   0.1s
[CV 3/3; 510/600] START max_depth=None, max_features=5, min_samples_leaf=5, min_samples_split=40
[CV 3/3; 510/600] END max_depth=None, max_features=5, min_samples_leaf=5, min_samples_split=40;, sco

[CV 3/3; 522/600] END max_depth=None, max_features=10, min_samples_leaf=1, min_samples_split=5;, score=0.743 total time=   0.1s
[CV 1/3; 523/600] START max_depth=None, max_features=10, min_samples_leaf=1, min_samples_split=10
[CV 1/3; 523/600] END max_depth=None, max_features=10, min_samples_leaf=1, min_samples_split=10;, score=0.738 total time=   0.1s
[CV 2/3; 523/600] START max_depth=None, max_features=10, min_samples_leaf=1, min_samples_split=10
[CV 2/3; 523/600] END max_depth=None, max_features=10, min_samples_leaf=1, min_samples_split=10;, score=0.746 total time=   0.1s
[CV 3/3; 523/600] START max_depth=None, max_features=10, min_samples_leaf=1, min_samples_split=10
[CV 3/3; 523/600] END max_depth=None, max_features=10, min_samples_leaf=1, min_samples_split=10;, score=0.750 total time=   0.1s
[CV 1/3; 524/600] START max_depth=None, max_features=10, min_samples_leaf=1, min_samples_split=20
[CV 1/3; 524/600] END max_depth=None, max_features=10, min_samples_leaf=1, min_samples_split=

[CV 1/3; 535/600] END max_depth=None, max_features=10, min_samples_leaf=10, min_samples_split=40;, score=0.692 total time=   0.1s
[CV 2/3; 535/600] START max_depth=None, max_features=10, min_samples_leaf=10, min_samples_split=40
[CV 2/3; 535/600] END max_depth=None, max_features=10, min_samples_leaf=10, min_samples_split=40;, score=0.733 total time=   0.1s
[CV 3/3; 535/600] START max_depth=None, max_features=10, min_samples_leaf=10, min_samples_split=40
[CV 3/3; 535/600] END max_depth=None, max_features=10, min_samples_leaf=10, min_samples_split=40;, score=0.715 total time=   0.1s
[CV 1/3; 536/600] START max_depth=None, max_features=10, min_samples_leaf=20, min_samples_split=1
[CV 1/3; 536/600] END max_depth=None, max_features=10, min_samples_leaf=20, min_samples_split=1;, score=nan total time=   0.0s
[CV 2/3; 536/600] START max_depth=None, max_features=10, min_samples_leaf=20, min_samples_split=1
[CV 2/3; 536/600] END max_depth=None, max_features=10, min_samples_leaf=20, min_samples_s

[CV 3/3; 547/600] END max_depth=None, max_features=50, min_samples_leaf=5, min_samples_split=5;, score=0.753 total time=   0.1s
[CV 1/3; 548/600] START max_depth=None, max_features=50, min_samples_leaf=5, min_samples_split=10
[CV 1/3; 548/600] END max_depth=None, max_features=50, min_samples_leaf=5, min_samples_split=10;, score=0.753 total time=   0.1s
[CV 2/3; 548/600] START max_depth=None, max_features=50, min_samples_leaf=5, min_samples_split=10
[CV 2/3; 548/600] END max_depth=None, max_features=50, min_samples_leaf=5, min_samples_split=10;, score=0.757 total time=   0.1s
[CV 3/3; 548/600] START max_depth=None, max_features=50, min_samples_leaf=5, min_samples_split=10
[CV 3/3; 548/600] END max_depth=None, max_features=50, min_samples_leaf=5, min_samples_split=10;, score=0.753 total time=   0.1s
[CV 1/3; 549/600] START max_depth=None, max_features=50, min_samples_leaf=5, min_samples_split=20
[CV 1/3; 549/600] END max_depth=None, max_features=50, min_samples_leaf=5, min_samples_split=

[CV 2/3; 560/600] END max_depth=None, max_features=50, min_samples_leaf=20, min_samples_split=40;, score=0.751 total time=   0.1s
[CV 3/3; 560/600] START max_depth=None, max_features=50, min_samples_leaf=20, min_samples_split=40
[CV 3/3; 560/600] END max_depth=None, max_features=50, min_samples_leaf=20, min_samples_split=40;, score=0.744 total time=   0.1s
[CV 1/3; 561/600] START max_depth=None, max_features=100, min_samples_leaf=1, min_samples_split=1
[CV 1/3; 561/600] END max_depth=None, max_features=100, min_samples_leaf=1, min_samples_split=1;, score=nan total time=   0.0s
[CV 2/3; 561/600] START max_depth=None, max_features=100, min_samples_leaf=1, min_samples_split=1
[CV 2/3; 561/600] END max_depth=None, max_features=100, min_samples_leaf=1, min_samples_split=1;, score=nan total time=   0.0s
[CV 3/3; 561/600] START max_depth=None, max_features=100, min_samples_leaf=1, min_samples_split=1
[CV 3/3; 561/600] END max_depth=None, max_features=100, min_samples_leaf=1, min_samples_split

[CV 3/3; 572/600] END max_depth=None, max_features=100, min_samples_leaf=10, min_samples_split=5;, score=0.758 total time=   0.2s
[CV 1/3; 573/600] START max_depth=None, max_features=100, min_samples_leaf=10, min_samples_split=10
[CV 1/3; 573/600] END max_depth=None, max_features=100, min_samples_leaf=10, min_samples_split=10;, score=0.756 total time=   0.2s
[CV 2/3; 573/600] START max_depth=None, max_features=100, min_samples_leaf=10, min_samples_split=10
[CV 2/3; 573/600] END max_depth=None, max_features=100, min_samples_leaf=10, min_samples_split=10;, score=0.757 total time=   0.2s
[CV 3/3; 573/600] START max_depth=None, max_features=100, min_samples_leaf=10, min_samples_split=10
[CV 3/3; 573/600] END max_depth=None, max_features=100, min_samples_leaf=10, min_samples_split=10;, score=0.756 total time=   0.2s
[CV 1/3; 574/600] START max_depth=None, max_features=100, min_samples_leaf=10, min_samples_split=20
[CV 1/3; 574/600] END max_depth=None, max_features=100, min_samples_leaf=10, 

[CV 3/3; 584/600] END max_depth=None, max_features=None, min_samples_leaf=1, min_samples_split=20;, score=0.756 total time=   0.4s
[CV 1/3; 585/600] START max_depth=None, max_features=None, min_samples_leaf=1, min_samples_split=40
[CV 1/3; 585/600] END max_depth=None, max_features=None, min_samples_leaf=1, min_samples_split=40;, score=0.757 total time=   0.4s
[CV 2/3; 585/600] START max_depth=None, max_features=None, min_samples_leaf=1, min_samples_split=40
[CV 2/3; 585/600] END max_depth=None, max_features=None, min_samples_leaf=1, min_samples_split=40;, score=0.759 total time=   0.4s
[CV 3/3; 585/600] START max_depth=None, max_features=None, min_samples_leaf=1, min_samples_split=40
[CV 3/3; 585/600] END max_depth=None, max_features=None, min_samples_leaf=1, min_samples_split=40;, score=0.756 total time=   0.4s
[CV 1/3; 586/600] START max_depth=None, max_features=None, min_samples_leaf=5, min_samples_split=1
[CV 1/3; 586/600] END max_depth=None, max_features=None, min_samples_leaf=5, 

[CV 1/3; 597/600] END max_depth=None, max_features=None, min_samples_leaf=20, min_samples_split=5;, score=0.751 total time=   0.4s
[CV 2/3; 597/600] START max_depth=None, max_features=None, min_samples_leaf=20, min_samples_split=5
[CV 2/3; 597/600] END max_depth=None, max_features=None, min_samples_leaf=20, min_samples_split=5;, score=0.760 total time=   0.3s
[CV 3/3; 597/600] START max_depth=None, max_features=None, min_samples_leaf=20, min_samples_split=5
[CV 3/3; 597/600] END max_depth=None, max_features=None, min_samples_leaf=20, min_samples_split=5;, score=0.748 total time=   0.3s
[CV 1/3; 598/600] START max_depth=None, max_features=None, min_samples_leaf=20, min_samples_split=10
[CV 1/3; 598/600] END max_depth=None, max_features=None, min_samples_leaf=20, min_samples_split=10;, score=0.751 total time=   0.4s
[CV 2/3; 598/600] START max_depth=None, max_features=None, min_samples_leaf=20, min_samples_split=10
[CV 2/3; 598/600] END max_depth=None, max_features=None, min_samples_leaf

360 fits failed out of a total of 1800.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
360 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/angeloturri/opt/anaconda3/envs/learn-env/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/angeloturri/opt/anaconda3/envs/learn-env/lib/python3.11/site-packages/sklearn/tree/_classes.py", line 889, in fit
    super().fit(
  File "/Users/angeloturri/opt/anaconda3/envs/learn-env/lib/python3.11/site-packages/sklearn/tree/_classes.py", line 177, in fit
    self._validate_params()
  File "/Users/angeloturri/opt/anaconda3/envs/learn-env/lib/python3.11/site

In [41]:
gs_dtc.best_params_

{'max_depth': None,
 'max_features': 50,
 'min_samples_leaf': 5,
 'min_samples_split': 20}

In [42]:
dtc = DecisionTreeClassifier(max_depth=5, max_features=100, min_samples_leaf=5, min_samples_split=10)
dtc.fit(X_train, y_train)

In [43]:
preds = dtc.predict(X_test)

In [44]:
report_columns = ['functional', 'functional needs repair', 
                  'non functional', 'accuracy', 'macro avg', 
                  'weighted avg']

report_rows = ['precision', 'recall', 
               'f1-score', 'support']

report = pd.DataFrame(classification_report(y_test, preds, output_dict=True), columns=report_columns, index=report_rows)
                      
matrix = pd.DataFrame(confusion_matrix(y_test, preds))

In [45]:
report

Unnamed: 0,functional,functional needs repair,non functional,accuracy,macro avg,weighted avg
precision,0.665686,0.0,0.860437,0.708822,0.508707,0.693149
recall,0.954687,0.0,0.494853,0.708822,0.48318,0.708822
f1-score,0.784414,0.0,0.628337,0.708822,0.470917,0.667977
support,8055.0,1064.0,5731.0,0.708822,14850.0,14850.0


In [46]:
matrix

Unnamed: 0,0,1,2
0,7690,0,365
1,969,0,95
2,2893,2,2836
