In [12]:
import numpy as np
import pandas as pd
import time

# @param dfFile: pandas.DataFrame ('nba_preprocessed.csv')
# @param dateStart, dateEnd: str in the format of 'YYYY-MM-DD'
# @param period: int
# @param attriToDrop: list[str]
# @param featureEng: int (1, 2, and 3 corresponds to feature1, 2, and 3, respectively)
# @return X, Y: pandas.DataFrame
# featureExtraction() outputs X, Y for model training.
# Game date can be assigned
# Attribute to be dropped can be assigned
def featureExtraction(dfFile, dateStart='1000-01-01', dateEnd='2999-12-31', period=5, attriToDrop=None, featureEng=None):
    df = pd.read_csv(dfFile)
    
    # Date selection
    df = df.loc[(df.Date_A >= dateStart) & (df.Date_A <= dateEnd), :].reset_index(drop=True)
    
    # Get label Y
    Y = df[['W/L_A']]
    Y = Y.rename(columns={'W/L_A': 'Label'})
    
    # Get averaged attributes X
    for idx, row in df.iterrows():
        df_sel = df.loc[df.Date_A <= row['Date_A'], :].reset_index(drop=True)
        
        # Process of Team_A
        gamePlayed_A = df_sel.loc[df_sel.Team_A == row['Team_A'], :]
        if len(gamePlayed_A) == 1:
            X_A = gamePlayed_A.loc[(gamePlayed_A.Team_A == row['Team_A']), :].sort_values(by=['Date_A'], ascending=False).iloc[0:1, 0:24].reset_index(drop=True)
        elif len(gamePlayed_A) < period:
            X_A = gamePlayed_A.loc[(gamePlayed_A.Team_A == row['Team_A']), :].sort_values(by=['Date_A'], ascending=False).iloc[1:len(gamePlayed_A), 0:24].reset_index(drop=True)
        else:
            X_A = gamePlayed_A.loc[(gamePlayed_A.Team_A == row['Team_A']), :].sort_values(by=['Date_A'], ascending=False).iloc[1:period+1, 0:24].reset_index(drop=True)
        
        # Process of Team_B
        gamePlayed_B = df_sel.loc[df_sel.Team_A == row['Team_B'], :]
        if len(gamePlayed_B) == 1:
            X_B = gamePlayed_B.loc[(gamePlayed_B.Team_A == row['Team_B']), :].sort_values(by=['Date_A'], ascending=False).iloc[0:1, 0:24].reset_index(drop=True)
        elif len(gamePlayed_B) < period:
            X_B = gamePlayed_B.loc[(gamePlayed_B.Team_A == row['Team_B']), :].sort_values(by=['Date_A'], ascending=False).iloc[1:len(gamePlayed_B), 0:24].reset_index(drop=True)
        else:
            X_B = gamePlayed_B.loc[(gamePlayed_B.Team_A == row['Team_B']), :].sort_values(by=['Date_A'], ascending=False).iloc[1:period+1, 0:24].reset_index(drop=True)
        
        # Attributes selection
        dropAttri = [x + '_A' for x in attriToDrop] if attriToDrop else []
        colToDrop = ['Home/Away_A'] + ['Team_A', 'Date_A', 'W/L_A', 'Score_A', 'Opponent_A']
        colToDrop += dropAttri if dropAttri else []
        X_A = X_A.drop(columns = colToDrop)
        X_B = X_B.drop(columns = colToDrop)
        
        # Rename X_B's columns
        X_B = X_B.rename(columns=lambda x: x[0:-2] + '_B')
        
        # Get X_single = [Home/Away_A + X_A + X_B]
        X_single = pd.DataFrame(data = pd.concat([X_A.mean(), X_B.mean()])).transpose()
        X_single = pd.concat([pd.DataFrame(data={'Home/Away_A': [row['Home/Away_A']]}), X_single], axis=1)
        
        # Concatenation dataFrames by row
        if idx == 0:
            X = X_single
        else:
            X = pd.concat([X, X_single], ignore_index=True)
        
    return X, Y

In [13]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from matplotlib import pyplot as plt
from sklearn.svm import SVC
import numpy as np
import pandas as pd
import operator
%matplotlib inline

def CrossValidationGridSearchNested(origin_df_X, origin_df_Y, num_trials, fold_num, est_classifcation, tuned_param, scoring):
    X_data = origin_df_X.values
    Y_data = origin_df_Y.values.ravel()
    max_score = -1
    best_estimator = est_classifcation
    is_tuned_param_empty = (tuned_param == []) | (tuned_param == None)
    
    for i in range(num_trials):
        inner_cv = StratifiedKFold(n_splits=fold_num, random_state=i, shuffle=True)
        outer_cv = StratifiedKFold(n_splits=fold_num, random_state=i+1, shuffle=True)
        
        if(is_tuned_param_empty):
            param_score = cross_val_score(est_classifcation, X=X_data, y=Y_data, cv=outer_cv, scoring=scoring).mean()
        else:
            # Non_nested parameter search and scoring
            clf = GridSearchCV(estimator=est_classifcation, param_grid=tuned_param, cv=inner_cv, scoring=scoring)
            clf.fit(X_data, Y_data)
        
            # CV with parameter optimization
            param_score = cross_val_score(clf.best_estimator_, X=X_data, y=Y_data, cv=outer_cv, scoring=scoring).mean()
            
        if(param_score > max_score):
            max_score = param_score
            if(is_tuned_param_empty):
                best_estimator = est_classifcation
            else:
                best_estimator = clf.best_estimator_
            
        progress = (i+1)/num_trials*100
        print(f'> progress = {progress}%')
    
    return (max_score, best_estimator)

# Execution

## -Feature Extraction

In [14]:
dfFile = '../NBA_Ino_part/nba_preprocessed.csv'
dateStart = '2017-10-01'
dateEnd = '2018-04-30'
# X, Y = featureExtraction(dfFile, dateStart, dateEnd)
X, Y = featureExtraction(dfFile, attriToDrop=['PTS'], dateStart=dateStart, dateEnd=dateEnd)
# X, Y = featureExtraction(dfFile)

In [6]:
X

Unnamed: 0,Home/Away_A,FG%_A,FGM_A,FGA_A,3P%_A,3PM_A,3PA_A,FT%_A,FTM_A,FTA_A,...,FTM_B,FTA_B,REB_B,OREB_B,DREB_B,AST_B,STL_B,BLK_B,TOV_B,PF_B
0,0,0.468000,36.000000,77.000000,0.481000,13.000000,27.000000,0.917000,11.000000,12.000000,...,15.000000,16.000000,36.000000,6.000000,30.000000,28.000000,9.0,5.0,15.000000,18.0
1,1,0.468000,36.000000,77.000000,0.481000,13.000000,27.000000,0.917000,11.000000,12.000000,...,8.500000,11.500000,49.500000,12.500000,37.000000,19.500000,6.5,3.5,15.500000,19.0
2,1,0.451500,36.500000,81.000000,0.407000,11.500000,28.500000,0.758500,11.500000,16.000000,...,29.500000,36.500000,44.500000,11.500000,33.000000,23.000000,10.0,8.5,12.500000,19.0
3,0,0.463667,37.333333,80.666667,0.387333,10.333333,26.666667,0.766667,13.666667,18.333333,...,22.666667,31.666667,50.666667,8.333333,42.333333,16.666667,4.0,4.0,17.333333,18.0
4,0,0.443750,37.500000,85.250000,0.355000,9.750000,27.750000,0.725000,12.500000,17.500000,...,16.400000,19.200000,43.400000,9.800000,33.600000,19.200000,9.4,3.6,14.400000,22.4
5,0,0.447400,38.400000,86.400000,0.324000,8.600000,26.200000,0.727800,13.400000,18.600000,...,21.600000,29.000000,44.000000,11.200000,32.800000,22.000000,9.8,4.4,16.000000,22.6
6,0,0.454800,40.600000,89.600000,0.337800,8.200000,24.800000,0.717200,15.000000,20.600000,...,16.200000,22.800000,45.800000,12.800000,33.000000,21.000000,7.6,4.0,15.200000,21.2
7,1,0.450200,40.200000,89.600000,0.359000,9.800000,27.000000,0.739200,17.000000,22.800000,...,19.000000,23.400000,42.800000,9.400000,33.400000,22.800000,9.6,6.6,12.800000,22.6
8,1,0.459400,41.800000,91.200000,0.389400,11.400000,28.800000,0.713600,17.200000,24.000000,...,15.600000,19.200000,42.000000,7.600000,34.400000,19.000000,8.0,4.4,16.400000,22.8
9,1,0.472600,41.400000,87.400000,0.416000,11.600000,27.200000,0.705600,18.200000,26.000000,...,16.200000,20.000000,43.200000,9.800000,33.400000,32.200000,7.8,10.0,17.000000,18.4


In [15]:
X_val = X.values
X_val

array([[ 0.    ,  0.468 , 36.    , ...,  5.    , 15.    , 18.    ],
       [ 1.    ,  0.468 , 36.    , ...,  3.5   , 15.5   , 19.    ],
       [ 1.    ,  0.4515, 36.5   , ...,  8.5   , 12.5   , 19.    ],
       ...,
       [ 0.    ,  0.418 , 35.8   , ...,  4.2   , 12.2   , 17.6   ],
       [ 1.    ,  0.421 , 36.4   , ...,  3.    , 12.    , 18.4   ],
       [ 1.    ,  0.4476, 39.2   , ...,  5.2   , 14.6   , 20.6   ]])

In [16]:
Y.head(5)

Unnamed: 0,Label
0,0
1,1
2,0
3,0
4,1


In [17]:
Y_val = Y.values
Y_val[0:5]

array([[0],
       [1],
       [0],
       [0],
       [1]])

In [18]:
Y_val = Y.values.ravel()
Y_val[0:5]

array([0, 1, 0, 0, 1])

## -Cross Validation Grid Search

### New Features

In [20]:
from sklearn.ensemble import RandomForestClassifier
from time import time

# Set the parameters by cross-validation
tuned_max_features = ['auto', 'sqrt', 'log2']
tuned_parameters = {
                     "n_estimators": range(100, 1001, 100), 
                     "criterion": ["gini", "entropy"],
                     "bootstrap": [True, False]
                     #"max_depth": [3, 5, 8, 9, 11],
                     #"max_features": tuned_max_features
                   }

# Number of random trials
NUM_TRIALS = 1

# We will use a Support Vector Classifier with "rbf" kernel
rf = RandomForestClassifier()

start = time()
(max_score, rf_best_estimator) = CrossValidationGridSearchNested(X, Y, NUM_TRIALS, 10, rf, tuned_parameters, 'roc_auc')
rf_best_parameter = rf_best_estimator.get_params()

print("CrossValidationGridSearchNested of RandomForestClassifier wih NUM_TRIALS = %2.0d took %.2f seconds."%(NUM_TRIALS, (time() - start)))
print(f'\nmax_score = {max_score}\n')
print(f'\nbest_estimator = {rf_best_estimator}\n')
print(f'\nbest_parameter = {rf_best_parameter}\n')

> progress = 100.0%
CrossValidationGridSearchNested of RandomForestClassifier wih NUM_TRIALS =  1 took 2572.64 seconds.

max_score = 0.6669381158159289


best_estimator = RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=1000, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)


best_parameter = {'bootstrap': True, 'class_weight': None, 'criterion': 'entropy', 'max_depth': None, 'max_features': 'auto', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_impurity_split': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 1000, 'n_jobs': 1, 'oob_score': False, 'random_state': None, 'verbose': 0, 'warm_start': False}



### Old Features

In [10]:
from sklearn.ensemble import RandomForestClassifier
from time import time

# Set the parameters by cross-validation
tuned_max_features = ['auto', 'sqrt', 'log2']
tuned_parameters = {
                     "n_estimators": range(100, 1001, 100), 
                     "criterion": ["gini", "entropy"],
                     "bootstrap": [True, False]
                     #"max_depth": [3, 5, 8, 9, 11],
                     #"max_features": tuned_max_features
                   }

# Number of random trials
NUM_TRIALS = 1

# We will use a Support Vector Classifier with "rbf" kernel
rf = RandomForestClassifier()

start = time()
(max_score, rf_best_estimator) = CrossValidationGridSearchNested(X_val, Y_val.ravel(), NUM_TRIALS, 10, rf, tuned_parameters, 'roc_auc')
rf_best_parameter = rf_best_estimator.get_params()

print("CrossValidationGridSearchNested of RandomForestClassifier wih NUM_TRIALS = %2.0d took %.2f seconds."%(NUM_TRIALS, (time() - start)))
print(f'\nmax_score = {max_score}\n')
print(f'\nbest_estimator = {rf_best_estimator}\n')
print(f'\nbest_parameter = {rf_best_parameter}\n')

> progress = 100.0%
CrossValidationGridSearchNested of RandomForestClassifier wih NUM_TRIALS =  1 took 1465.94 seconds.

max_score = 0.969396523233525


best_estimator = RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=900, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)


best_parameter = {'bootstrap': True, 'class_weight': None, 'criterion': 'entropy', 'max_depth': None, 'max_features': 'auto', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_impurity_split': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 900, 'n_jobs': 1, 'oob_score': False, 'random_state': None, 'verbose': 0, 'warm_start': False}



In [12]:
# Set the parameters by cross-validation
tuned_max_features = ['auto', 'sqrt', 'log2']
tuned_parameters = {
                     "n_estimators": range(100, 1001, 100), 
                     "criterion": ["gini", "entropy"],
                     "bootstrap": [True, False],
                     #"max_depth": [3, 5, 8, 9, 11],
                     "max_features": tuned_max_features
                   }

# Number of random trials
NUM_TRIALS = 1

# We will use a Support Vector Classifier with "rbf" kernel
rf = RandomForestClassifier()

start = time()
(max_score, rf_best_estimator) = CrossValidationGridSearchNested(X_val, Y_val.ravel(), NUM_TRIALS, 10, rf, tuned_parameters, 'roc_auc')
rf_best_parameter = rf_best_estimator.get_params()

print("CrossValidationGridSearchNested of RandomForestClassifier wih NUM_TRIALS = %2.0d took %.2f seconds."%(NUM_TRIALS, (time() - start)))
print(f'\nmax_score = {max_score}\n')
print(f'\nbest_estimator = {rf_best_estimator}\n')
print(f'\nbest_parameter = {rf_best_parameter}\n')

> progress = 100.0%
CrossValidationGridSearchNested of RandomForestClassifier wih NUM_TRIALS =  1 took 4329.53 seconds.

max_score = 0.9684678432150176


best_estimator = RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',
            max_depth=None, max_features='log2', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=900, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)


best_parameter = {'bootstrap': True, 'class_weight': None, 'criterion': 'entropy', 'max_depth': None, 'max_features': 'log2', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_impurity_split': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 900, 'n_jobs': 1, 'oob_score': False, 'random_state': None, 'verbose': 0, 'warm_start': False}



In [13]:
# Set the parameters by cross-validation
tuned_max_features = ['auto', 'sqrt', 'log2']
tuned_parameters = {
                     "n_estimators": range(100, 1001, 100), 
                     "criterion": ["gini", "entropy"],
                     "bootstrap": [True, False],
                     "max_depth": [3, 5, 8, 9, 11],
                     "max_features": tuned_max_features
                   }

# Number of random trials
NUM_TRIALS = 1

# We will use a Support Vector Classifier with "rbf" kernel
rf = RandomForestClassifier()

start = time()
(max_score, rf_best_estimator) = CrossValidationGridSearchNested(X_val, Y_val.ravel(), NUM_TRIALS, 10, rf, tuned_parameters, 'roc_auc')
rf_best_parameter = rf_best_estimator.get_params()

print("CrossValidationGridSearchNested of RandomForestClassifier wih NUM_TRIALS = %2.0d took %.2f seconds."%(NUM_TRIALS, (time() - start)))
print(f'\nmax_score = {max_score}\n')
print(f'\nbest_estimator = {rf_best_estimator}\n')
print(f'\nbest_parameter = {rf_best_parameter}\n')

> progress = 100.0%
CrossValidationGridSearchNested of RandomForestClassifier wih NUM_TRIALS =  1 took 19531.87 seconds.

max_score = 0.9677308480401876


best_estimator = RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',
            max_depth=11, max_features='sqrt', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=800, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)


best_parameter = {'bootstrap': True, 'class_weight': None, 'criterion': 'entropy', 'max_depth': 11, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_impurity_split': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 800, 'n_jobs': 1, 'oob_score': False, 'random_state': None, 'verbose': 0, 'warm_start': False}



In [10]:
from sklearn.ensemble import RandomForestClassifier
from time import time
# Set the parameters by cross-validation
tuned_max_features = ['auto', 'sqrt', 'log2']
tuned_parameters = {
                     "n_estimators": range(100, 1001, 100), 
                     "criterion": ["gini", "entropy"],
                     "bootstrap": [True, False],
                     "max_depth": [3, 5, 8, 9, 11],
                     "max_features": tuned_max_features
                   }

# Number of random trials
NUM_TRIALS = 10

# We will use a Support Vector Classifier with "rbf" kernel
rf = RandomForestClassifier()

start = time()
(max_score, rf_best_estimator) = CrossValidationGridSearchNested(X_val, Y_val.ravel(), NUM_TRIALS, 10, rf, tuned_parameters, 'roc_auc')
rf_best_parameter = rf_best_estimator.get_params()

print("CrossValidationGridSearchNested of RandomForestClassifier wih NUM_TRIALS = %2.0d took %.2f seconds."%(NUM_TRIALS, (time() - start)))
print(f'\nmax_score = {max_score}\n')
print(f'\nbest_estimator = {rf_best_estimator}\n')
print(f'\nbest_parameter = {rf_best_parameter}\n')

> progress = 10.0%
> progress = 20.0%
> progress = 30.0%
> progress = 40.0%
> progress = 50.0%
> progress = 60.0%
> progress = 70.0%
> progress = 80.0%
> progress = 90.0%
> progress = 100.0%
CrossValidationGridSearchNested of RandomForestClassifier wih NUM_TRIALS = 10 took 452298.04 seconds.

max_score = 0.9681935355938925


best_estimator = RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',
            max_depth=11, max_features='sqrt', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=700, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)


best_parameter = {'bootstrap': True, 'class_weight': None, 'criterion': 'entropy', 'max_depth': 11, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_impurity_split': None, 'min_samples_leaf': 1, 'm

### New Features

In [24]:
# Set the parameters by cross-validation
tuned_max_features = ['auto', 'sqrt', 'log2']
tuned_parameters = {
                     "n_estimators": range(100, 1001, 100), 
                     "criterion": ["gini", "entropy"],
                     "bootstrap": [True, False],
                     "max_depth": [3, 5, 8, 9, 11],
                     "max_features": tuned_max_features
                   }

# Number of random trials
NUM_TRIALS = 1

# We will use a Support Vector Classifier with "rbf" kernel
rf = RandomForestClassifier()

start = time()
(max_score, rf_best_estimator) = CrossValidationGridSearchNested(X, Y, NUM_TRIALS, 10, rf, tuned_parameters, 'roc_auc')
rf_best_parameter = rf_best_estimator.get_params()

print("CrossValidationGridSearchNested of RandomForestClassifier wih NUM_TRIALS = %2.0d took %.2f seconds."%(NUM_TRIALS, (time() - start)))
print(f'\nmax_score = {max_score}\n')
print(f'\nbest_estimator = {rf_best_estimator}\n')
print(f'\nbest_parameter = {rf_best_parameter}\n')

> progress = 100.0%
CrossValidationGridSearchNested of RandomForestClassifier wih NUM_TRIALS =  1 took 18430.59 seconds.

max_score = 0.6643351480428547


best_estimator = RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=8, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=200, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)


best_parameter = {'bootstrap': True, 'class_weight': None, 'criterion': 'gini', 'max_depth': 8, 'max_features': 'auto', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_impurity_split': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 200, 'n_jobs': 1, 'oob_score': False, 'random_state': None, 'verbose': 0, 'warm_start': False}



In [25]:
from sklearn.ensemble import RandomForestClassifier
from time import time
# Set the parameters by cross-validation
tuned_max_features = ['auto', 'sqrt', 'log2']
tuned_parameters = {
                     "n_estimators": range(100, 1001, 100), 
                     "criterion": ["gini", "entropy"],
                     "bootstrap": [True, False],
                     "max_depth": [3, 5, 8, 9, 11],
                     "max_features": tuned_max_features
                   }

# Number of random trials
NUM_TRIALS = 10

# We will use a Support Vector Classifier with "rbf" kernel
rf = RandomForestClassifier()

start = time()
(max_score, rf_best_estimator) = CrossValidationGridSearchNested(X, Y, NUM_TRIALS, 10, rf, tuned_parameters, 'roc_auc')
rf_best_parameter = rf_best_estimator.get_params()

print("CrossValidationGridSearchNested of RandomForestClassifier wih NUM_TRIALS = %2.0d took %.2f seconds."%(NUM_TRIALS, (time() - start)))
print(f'\nmax_score = {max_score}\n')
print(f'\nbest_estimator = {rf_best_estimator}\n')
print(f'\nbest_parameter = {rf_best_parameter}\n')

> progress = 10.0%
> progress = 20.0%
> progress = 30.0%
> progress = 40.0%
> progress = 50.0%
> progress = 60.0%
> progress = 70.0%
> progress = 80.0%
> progress = 90.0%
> progress = 100.0%
CrossValidationGridSearchNested of RandomForestClassifier wih NUM_TRIALS = 10 took 184125.37 seconds.

max_score = 0.6682583088188638


best_estimator = RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=8, max_features='log2', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=800, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)


best_parameter = {'bootstrap': True, 'class_weight': None, 'criterion': 'gini', 'max_depth': 8, 'max_features': 'log2', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_impurity_split': None, 'min_samples_leaf': 1, 'min_sampl