In [1]:
from SemiRandomClassifier import SemiRandomDecisionTreeClassifier as src
from SemiRandomForestClassifier import SemiRandomForestClassifier as srf
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.tree import export_text, plot_tree
from sklearn.metrics import roc_auc_score, accuracy_score
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier

In [2]:
diabetes = load_breast_cancer()
X = diabetes['data']
y = diabetes['target']

In [3]:
X_train,X_test, y_train, y_test = train_test_split(X,y,test_size = 0.3, stratify = y)

## Decision Tree Output Comparion
From the following output, you can tell that both classifiers using split method 'best' are fairly simiar, the roc_auc_score based on probability estimate is fairly close, and the prediction outcome is the same.

In [4]:
classifier_benchmark = DecisionTreeClassifier(max_depth = 3,min_samples_leaf = 8)
classifier_benchmark.fit(X_train, y_train)
print(export_text(classifier_benchmark))

|--- feature_22 <= 106.10
|   |--- feature_27 <= 0.14
|   |   |--- feature_1 <= 21.56
|   |   |   |--- class: 1
|   |   |--- feature_1 >  21.56
|   |   |   |--- class: 1
|   |--- feature_27 >  0.14
|   |   |--- feature_21 <= 26.44
|   |   |   |--- class: 1
|   |   |--- feature_21 >  26.44
|   |   |   |--- class: 0
|--- feature_22 >  106.10
|   |--- feature_21 <= 20.88
|   |   |--- feature_15 <= 0.02
|   |   |   |--- class: 1
|   |   |--- feature_15 >  0.02
|   |   |   |--- class: 0
|   |--- feature_21 >  20.88
|   |   |--- feature_24 <= 0.11
|   |   |   |--- class: 0
|   |   |--- feature_24 >  0.11
|   |   |   |--- class: 0



In [5]:
my_classifier = src(MAX_DEPTH = 3, MIN_SAMPLES_LEAF = 8)
my_classifier.fit(X_train, y_train)

n: 398, score: 0.47
	 score_if_split: 0.14, split_col_idx: 22, split_value:106.0, max_depth: 3
n: 244, score: 0.09
	 score_if_split: 0.06, split_col_idx: 27, split_value:0.1342, max_depth: 2
n: 228, score: 0.03
	 score_if_split: 0.02, split_col_idx: 1, split_value:21.54, max_depth: 1
n: 190, score: 0.00, max_depth: 0
n: 38, score: 0.15, max_depth: 0
n: 16, score: 0.49
	 score_if_split: 0.11, split_col_idx: 21, split_value:26.38, max_depth: 1
n: 8, score: 0.22, max_depth: 0
n: 8, score: 0.00, max_depth: 0
n: 154, score: 0.21
	 score_if_split: 0.11, split_col_idx: 21, split_value:20.43, max_depth: 2
n: 18, score: 0.40
	 score_if_split: 0.21, split_col_idx: 15, split_value:0.0182, max_depth: 1
n: 10, score: 0.00, max_depth: 0
n: 8, score: 0.47, max_depth: 0
n: 136, score: 0.07
	 score_if_split: 0.05, split_col_idx: 24, split_value:0.1089, max_depth: 1
n: 9, score: 0.49, max_depth: 0
n: 127, score: 0.02, max_depth: 0


<SemiRandomClassifier.SemiRandomDecisionTreeClassifier at 0x1fa5a385070>

In [6]:
print('The roc_auc score of benchmark classifier is ' + str(roc_auc_score(y_test,classifier_benchmark.predict_proba(X_test)[:,1])) )
print('The roc_auc score of my classifier is '+ str(roc_auc_score( y_test, my_classifier.predict_probability(X_test)[:,1] ) )) 

The roc_auc score of benchmark classifier is 0.9462616822429906
The roc_auc score of my classifier is 0.9466997663551402


In [7]:
print('The accuracy score of benchmark classifier is ' + str(accuracy_score(y_test,classifier_benchmark.predict(X_test))) )
print('The accuracy score of my classifier is '+ str(accuracy_score( y_test, my_classifier.predict(X_test)))) 

The accuracy score of benchmark classifier is 0.9298245614035088
The accuracy score of my classifier is 0.9298245614035088


## Random Forest Performance
The following shows that my random forest classifier built upon my Decision Tree Classifier performs similar roc_auc and the same prediction.

In [30]:
benchmark_rf = RandomForestClassifier(n_estimators = 50, max_depth = 3,max_features = 'sqrt',max_samples = None)
benchmark_rf.fit(X_train, y_train)

RandomForestClassifier(max_depth=3, max_features='sqrt', n_estimators=50)

In [31]:
my_rf = srf(n_trees = 50, MAX_DEPTH  = 3, bootstrap_max_samples = None ,MAX_FEATURES = "sqrt")
my_rf.fit(X_train, y_train)

------ This is the 1th tree-----
n: 398, score: 0.46
	 score_if_split: 0.27, split_col_idx: 5, split_value:0.1021, max_depth: 3
n: 236, score: 0.18
	 score_if_split: 0.09, split_col_idx: 23, split_value:1032.0, max_depth: 2
n: 219, score: 0.08
	 score_if_split: 0.06, split_col_idx: 6, split_value:0.0739, max_depth: 1
n: 207, score: 0.04, max_depth: 0
n: 12, score: 0.49, max_depth: 0
n: 17, score: 0.21
	 score_if_split: 0.08, split_col_idx: 24, split_value:0.09862, max_depth: 1
n: 3, score: 0.44, max_depth: 0
n: 14, score: 0.00, max_depth: 0
n: 162, score: 0.40
	 score_if_split: 0.12, split_col_idx: 27, split_value:0.1342, max_depth: 2
n: 34, score: 0.00, max_depth: 1
n: 128, score: 0.16
	 score_if_split: 0.09, split_col_idx: 19, split_value:0.01008, max_depth: 1
n: 123, score: 0.09, max_depth: 0
n: 5, score: 0.00, max_depth: 0
------ This is the 2th tree-----
n: 398, score: 0.48
	 score_if_split: 0.16, split_col_idx: 20, split_value:16.76, max_depth: 3
n: 257, score: 0.18
	 score_if_sp

n: 299, score: 0.19
	 score_if_split: 0.13, split_col_idx: 6, split_value:0.101, max_depth: 2
n: 247, score: 0.05
	 score_if_split: 0.04, split_col_idx: 22, split_value:101.9, max_depth: 1
n: 223, score: 0.00, max_depth: 0
n: 24, score: 0.38, max_depth: 0
n: 52, score: 0.50
	 score_if_split: 0.37, split_col_idx: 14, split_value:0.008064, max_depth: 1
n: 28, score: 0.41, max_depth: 0
n: 24, score: 0.33, max_depth: 0
n: 99, score: 0.04
	 score_if_split: 0.02, split_col_idx: 24, split_value:0.09862, max_depth: 2
n: 1, score: 0.00, max_depth: 1
n: 98, score: 0.02
	 score_if_split: 0.00, split_col_idx: 22, split_value:109.8, max_depth: 1
n: 1, score: 0.00, max_depth: 0
n: 97, score: 0.00, max_depth: 0
------ This is the 12th tree-----
n: 398, score: 0.47
	 score_if_split: 0.17, split_col_idx: 20, split_value:15.48, max_depth: 3
n: 224, score: 0.08
	 score_if_split: 0.05, split_col_idx: 5, split_value:0.166, max_depth: 2
n: 217, score: 0.04
	 score_if_split: 0.02, split_col_idx: 29, split_va

n: 398, score: 0.46
	 score_if_split: 0.16, split_col_idx: 27, split_value:0.1465, max_depth: 3
n: 273, score: 0.18
	 score_if_split: 0.15, split_col_idx: 1, split_value:18.84, max_depth: 2
n: 180, score: 0.01
	 score_if_split: 0.00, split_col_idx: 27, split_value:0.1452, max_depth: 1
n: 179, score: 0.00, max_depth: 0
n: 1, score: 0.00, max_depth: 0
n: 93, score: 0.41
	 score_if_split: 0.17, split_col_idx: 20, split_value:16.76, max_depth: 1
n: 75, score: 0.21, max_depth: 0
n: 18, score: 0.00, max_depth: 0
n: 125, score: 0.12
	 score_if_split: 0.07, split_col_idx: 22, split_value:109.1, max_depth: 2
n: 19, score: 0.49
	 score_if_split: 0.18, split_col_idx: 19, split_value:0.01172, max_depth: 1
n: 13, score: 0.26, max_depth: 0
n: 6, score: 0.00, max_depth: 0
n: 106, score: 0.00, max_depth: 1
------ This is the 22th tree-----
n: 398, score: 0.47
	 score_if_split: 0.11, split_col_idx: 27, split_value:0.1423, max_depth: 3
n: 255, score: 0.10
	 score_if_split: 0.09, split_col_idx: 12, split

n: 398, score: 0.44
	 score_if_split: 0.18, split_col_idx: 0, split_value:15.04, max_depth: 3
n: 284, score: 0.18
	 score_if_split: 0.07, split_col_idx: 27, split_value:0.1571, max_depth: 2
n: 264, score: 0.07
	 score_if_split: 0.07, split_col_idx: 29, split_value:0.05504, max_depth: 1
n: 1, score: 0.00, max_depth: 0
n: 263, score: 0.07, max_depth: 0
n: 20, score: 0.10
	 score_if_split: 0.00, split_col_idx: 13, split_value:12.96, max_depth: 1
n: 1, score: 0.00, max_depth: 0
n: 19, score: 0.00, max_depth: 0
n: 114, score: 0.19
	 score_if_split: 0.07, split_col_idx: 27, split_value:0.1452, max_depth: 2
n: 18, score: 0.44
	 score_if_split: 0.19, split_col_idx: 18, split_value:0.01669, max_depth: 1
n: 12, score: 0.15, max_depth: 0
n: 6, score: 0.28, max_depth: 0
n: 96, score: 0.00, max_depth: 1
------ This is the 32th tree-----
n: 398, score: 0.47
	 score_if_split: 0.21, split_col_idx: 13, split_value:31.24, max_depth: 3
n: 246, score: 0.16
	 score_if_split: 0.13, split_col_idx: 5, split_v

n: 398, score: 0.45
	 score_if_split: 0.13, split_col_idx: 3, split_value:693.7, max_depth: 3
n: 278, score: 0.15
	 score_if_split: 0.09, split_col_idx: 23, split_value:783.6, max_depth: 2
n: 246, score: 0.04
	 score_if_split: 0.03, split_col_idx: 26, split_value:0.3662, max_depth: 1
n: 223, score: 0.00, max_depth: 0
n: 23, score: 0.34, max_depth: 0
n: 32, score: 0.50
	 score_if_split: 0.23, split_col_idx: 4, split_value:0.1031, max_depth: 1
n: 20, score: 0.38, max_depth: 0
n: 12, score: 0.00, max_depth: 0
n: 120, score: 0.11
	 score_if_split: 0.02, split_col_idx: 12, split_value:1.338, max_depth: 2
n: 6, score: 0.00, max_depth: 1
n: 114, score: 0.02
	 score_if_split: 0.01, split_col_idx: 28, split_value:0.1783, max_depth: 1
n: 4, score: 0.38, max_depth: 0
n: 110, score: 0.00, max_depth: 0
------ This is the 42th tree-----
n: 398, score: 0.48
	 score_if_split: 0.14, split_col_idx: 22, split_value:106.0, max_depth: 3
n: 229, score: 0.08
	 score_if_split: 0.04, split_col_idx: 27, split_v

<SemiRandomForestClassifier.SemiRandomForestClassifier at 0x1fa5ece61f0>

In [32]:
print('The roc_auc score of benchmark classifier is ' + str(roc_auc_score(y_test,benchmark_rf.predict_proba(X_test)[:,1])) )
print('The roc_auc score of my classifier is '+ str(roc_auc_score( y_test, my_rf.predict_probability(X_test)[:,1] ) )) 

The roc_auc score of benchmark classifier is 0.9897780373831776
The roc_auc score of my classifier is 0.9931366822429907


In [33]:
print('The accuracy score of benchmark classifier is ' + str(accuracy_score(y_test,benchmark_rf.predict(X_test))) )
print('The accuracy score of my classifier is '+ str(accuracy_score( y_test, my_rf.predict(X_test) ) )) 

The accuracy score of benchmark classifier is 0.9766081871345029
The accuracy score of my classifier is 0.9766081871345029


The following compares split method 'random' with ExtraTreesClassifier with all hyperparameter set the same

In [34]:
benchmark_extra_rf = ExtraTreesClassifier(n_estimators = 50, max_depth = 3, max_samples = None,max_features ='sqrt',bootstrap = False)
benchmark_extra_rf.fit(X_train, y_train)

ExtraTreesClassifier(max_depth=3, max_features='sqrt', n_estimators=50)

In [35]:
my_rf = srf(n_trees = 50, split = 'random', MAX_DEPTH = 3, MAX_FEATURES = 'sqrt', bootstrap_max_samples = None, bootstrap = False)
my_rf.fit(X_train, y_train)

------ This is the 1th tree-----
n: 398, score: 0.47
	 score_if_split: 0.33, split_col_idx: 22, split_value:87.78, max_depth: 3
n: 135, score: 0.01
	 score_if_split: 0.01, split_col_idx: 22, split_value:79.73, max_depth: 2
n: 71, score: 0.00, max_depth: 1
n: 64, score: 0.03
	 score_if_split: 0.03, split_col_idx: 26, split_value:0.1316, max_depth: 1
n: 30, score: 0.00, max_depth: 0
n: 34, score: 0.06, max_depth: 0
n: 263, score: 0.49
	 score_if_split: 0.25, split_col_idx: 22, split_value:124.1, max_depth: 2
n: 160, score: 0.40
	 score_if_split: 0.37, split_col_idx: 16, split_value:0.05371, max_depth: 1
n: 147, score: 0.37, max_depth: 0
n: 13, score: 0.43, max_depth: 0
n: 103, score: 0.02
	 score_if_split: 0.02, split_col_idx: 14, split_value:0.005288, max_depth: 1
n: 31, score: 0.06, max_depth: 0
n: 72, score: 0.00, max_depth: 0
------ This is the 2th tree-----
n: 398, score: 0.47
	 score_if_split: 0.17, split_col_idx: 23, split_value:764.0, max_depth: 3
n: 237, score: 0.10
	 score_if_s

n: 83, score: 0.39, max_depth: 0
n: 14, score: 0.24
	 score_if_split: 0.10, split_col_idx: 22, split_value:128.2, max_depth: 1
n: 3, score: 0.44, max_depth: 0
n: 11, score: 0.00, max_depth: 0
n: 115, score: 0.05
	 score_if_split: 0.05, split_col_idx: 13, split_value:67.78, max_depth: 2
n: 65, score: 0.09
	 score_if_split: 0.09, split_col_idx: 13, split_value:31.72, max_depth: 1
n: 22, score: 0.17, max_depth: 0
n: 43, score: 0.05, max_depth: 0
n: 50, score: 0.00, max_depth: 1
------ This is the 17th tree-----
n: 398, score: 0.47
	 score_if_split: 0.35, split_col_idx: 1, split_value:18.77, max_depth: 3
n: 201, score: 0.23
	 score_if_split: 0.20, split_col_idx: 27, split_value:0.0812, max_depth: 2
n: 91, score: 0.00, max_depth: 1
n: 110, score: 0.37
	 score_if_split: 0.32, split_col_idx: 7, split_value:0.03152, max_depth: 1
n: 33, score: 0.00, max_depth: 0
n: 77, score: 0.46, max_depth: 0
n: 197, score: 0.47
	 score_if_split: 0.31, split_col_idx: 2, split_value:77.87, max_depth: 2
n: 42, 

n: 114, score: 0.03
	 score_if_split: 0.03, split_col_idx: 10, split_value:0.3093, max_depth: 1
n: 24, score: 0.15, max_depth: 0
n: 90, score: 0.00, max_depth: 0
------ This is the 27th tree-----
n: 398, score: 0.47
	 score_if_split: 0.25, split_col_idx: 26, split_value:0.2085, max_depth: 3
n: 191, score: 0.05
	 score_if_split: 0.05, split_col_idx: 21, split_value:23.64, max_depth: 2
n: 109, score: 0.00, max_depth: 1
n: 82, score: 0.11
	 score_if_split: 0.09, split_col_idx: 22, split_value:102.5, max_depth: 1
n: 75, score: 0.05, max_depth: 0
n: 7, score: 0.49, max_depth: 0
n: 207, score: 0.43
	 score_if_split: 0.32, split_col_idx: 21, split_value:23.13, max_depth: 2
n: 53, score: 0.42
	 score_if_split: 0.16, split_col_idx: 13, split_value:23.31, max_depth: 1
n: 31, score: 0.00, max_depth: 0
n: 22, score: 0.40, max_depth: 0
n: 154, score: 0.29
	 score_if_split: 0.23, split_col_idx: 6, split_value:0.06574, max_depth: 1
n: 17, score: 0.46, max_depth: 0
n: 137, score: 0.21, max_depth: 0
--

n: 26, score: 0.45, max_depth: 0
n: 90, score: 0.04, max_depth: 0
------ This is the 43th tree-----
n: 398, score: 0.47
	 score_if_split: 0.23, split_col_idx: 3, split_value:748.9, max_depth: 3
n: 294, score: 0.28
	 score_if_split: 0.25, split_col_idx: 12, split_value:2.805, max_depth: 2
n: 244, score: 0.20
	 score_if_split: 0.17, split_col_idx: 7, split_value:0.02822, max_depth: 1
n: 148, score: 0.03, max_depth: 0
n: 96, score: 0.39, max_depth: 0
n: 50, score: 0.49
	 score_if_split: 0.42, split_col_idx: 24, split_value:0.1678, max_depth: 1
n: 43, score: 0.45, max_depth: 0
n: 7, score: 0.24, max_depth: 0
n: 104, score: 0.09
	 score_if_split: 0.09, split_col_idx: 3, split_value:840.4, max_depth: 2
n: 20, score: 0.26
	 score_if_split: 0.12, split_col_idx: 15, split_value:0.01515, max_depth: 1
n: 5, score: 0.48, max_depth: 0
n: 15, score: 0.00, max_depth: 0
n: 84, score: 0.05
	 score_if_split: 0.03, split_col_idx: 22, split_value:129.0, max_depth: 1
n: 6, score: 0.44, max_depth: 0
n: 78, 

<SemiRandomForestClassifier.SemiRandomForestClassifier at 0x1fa65890a30>

In [36]:
print('The roc_auc score of benchmark classifier is ' + str(roc_auc_score(y_test,benchmark_extra_rf.predict_proba(X_test)[:,1])) )
print('The roc_auc score of my classifier is '+ str(roc_auc_score( y_test, my_rf.predict_probability(X_test)[:,1] ) )) 

The roc_auc score of benchmark classifier is 0.9913843457943925
The roc_auc score of my classifier is 0.9916764018691588


In [37]:
print('The accuracy score of benchmark classifier is ' + str(accuracy_score(y_test,benchmark_extra_rf.predict(X_test))) )
print('The accuracy score of my classifier is '+ str(accuracy_score( y_test, my_rf.predict(X_test) ) )) 

The accuracy score of benchmark classifier is 0.9766081871345029
The accuracy score of my classifier is 0.9707602339181286


## New aplit method -- 'proportion_best', 'max_like_best'
The following shows the performance of another 2 split methods, these 2 methods lower the variance without including too much variance in splitting, thus the accuracy score can be higher than ExtraTreesClassifier in some cases, while in other cases, it can have lower variance and thus higher accuracy score than RandomForestClassifier.

In [38]:
my_rf_proportion = srf(n_trees = 50, split = 'proportion_best', MAX_DEPTH = 3, MAX_FEATURES = 'sqrt', bootstrap_max_samples = None, bootstrap = False)
my_rf_proportion.fit(X_train, y_train)

------ This is the 1th tree-----
n: 398, score: 0.47
	 score_if_split: 0.19, split_col_idx: 6, split_value:0.08777, max_depth: 3
n: 243, score: 0.14
	 score_if_split: 0.12, split_col_idx: 14, split_value:0.00329, max_depth: 2
n: 8, score: 0.47
	 score_if_split: 0.30, split_col_idx: 28, split_value:0.2994, max_depth: 1
n: 5, score: 0.48, max_depth: 0
n: 3, score: 0.00, max_depth: 0
n: 235, score: 0.10
	 score_if_split: 0.08, split_col_idx: 3, split_value:698.8, max_depth: 1
n: 221, score: 0.05, max_depth: 0
n: 14, score: 0.50, max_depth: 0
n: 155, score: 0.27
	 score_if_split: 0.13, split_col_idx: 23, split_value:764.0, max_depth: 2
n: 30, score: 0.42
	 score_if_split: 0.28, split_col_idx: 4, split_value:0.09699, max_depth: 1
n: 13, score: 0.00, max_depth: 0
n: 17, score: 0.50, max_depth: 0
n: 125, score: 0.06
	 score_if_split: 0.04, split_col_idx: 1, split_value:13.98, max_depth: 1
n: 3, score: 0.44, max_depth: 0
n: 122, score: 0.03, max_depth: 0
------ This is the 2th tree-----
n: 398

n: 198, score: 0.47
	 score_if_split: 0.30, split_col_idx: 5, split_value:0.1039, max_depth: 2
n: 93, score: 0.42
	 score_if_split: 0.16, split_col_idx: 3, split_value:698.8, max_depth: 1
n: 71, score: 0.18, max_depth: 0
n: 22, score: 0.09, max_depth: 0
n: 105, score: 0.19
	 score_if_split: 0.14, split_col_idx: 28, split_value:0.2398, max_depth: 1
n: 11, score: 0.50, max_depth: 0
n: 94, score: 0.10, max_depth: 0
------ This is the 11th tree-----
n: 398, score: 0.47
	 score_if_split: 0.16, split_col_idx: 23, split_value:867.1, max_depth: 3
n: 266, score: 0.17
	 score_if_split: 0.09, split_col_idx: 27, split_value:0.1561, max_depth: 2
n: 250, score: 0.08
	 score_if_split: 0.08, split_col_idx: 21, split_value:29.16, max_depth: 1
n: 207, score: 0.03, max_depth: 0
n: 43, score: 0.30, max_depth: 0
n: 16, score: 0.22
	 score_if_split: 0.12, split_col_idx: 0, split_value:9.731, max_depth: 1
n: 1, score: 0.00, max_depth: 0
n: 15, score: 0.12, max_depth: 0
n: 132, score: 0.13
	 score_if_split: 0

n: 114, score: 0.07, max_depth: 0
------ This is the 20th tree-----
n: 398, score: 0.47
	 score_if_split: 0.15, split_col_idx: 20, split_value:16.77, max_depth: 3
n: 264, score: 0.16
	 score_if_split: 0.12, split_col_idx: 22, split_value:106.0, max_depth: 2
n: 244, score: 0.09
	 score_if_split: 0.08, split_col_idx: 21, split_value:29.16, max_depth: 1
n: 201, score: 0.03, max_depth: 0
n: 43, score: 0.33, max_depth: 0
n: 20, score: 0.49
	 score_if_split: 0.29, split_col_idx: 17, split_value:0.01196, max_depth: 1
n: 5, score: 0.00, max_depth: 0
n: 15, score: 0.39, max_depth: 0
n: 134, score: 0.13
	 score_if_split: 0.09, split_col_idx: 27, split_value:0.1456, max_depth: 2
n: 28, score: 0.44
	 score_if_split: 0.38, split_col_idx: 28, split_value:0.2841, max_depth: 1
n: 18, score: 0.49, max_depth: 0
n: 10, score: 0.18, max_depth: 0
n: 106, score: 0.00, max_depth: 1
------ This is the 21th tree-----
n: 398, score: 0.47
	 score_if_split: 0.20, split_col_idx: 0, split_value:15.0, max_depth: 3
n

n: 228, score: 0.03
	 score_if_split: 0.02, split_col_idx: 14, split_value:0.00328, max_depth: 1
n: 3, score: 0.44, max_depth: 0
n: 225, score: 0.02, max_depth: 0
n: 16, score: 0.49
	 score_if_split: 0.36, split_col_idx: 24, split_value:0.1785, max_depth: 1
n: 12, score: 0.49, max_depth: 0
n: 4, score: 0.00, max_depth: 0
n: 154, score: 0.21
	 score_if_split: 0.16, split_col_idx: 26, split_value:0.221, max_depth: 2
n: 12, score: 0.44
	 score_if_split: 0.27, split_col_idx: 27, split_value:0.08235, max_depth: 1
n: 2, score: 0.00, max_depth: 0
n: 10, score: 0.32, max_depth: 0
n: 142, score: 0.13
	 score_if_split: 0.08, split_col_idx: 21, split_value:19.58, max_depth: 1
n: 11, score: 0.46, max_depth: 0
n: 131, score: 0.04, max_depth: 0
------ This is the 30th tree-----
n: 398, score: 0.47
	 score_if_split: 0.15, split_col_idx: 27, split_value:0.1427, max_depth: 3
n: 268, score: 0.17
	 score_if_split: 0.11, split_col_idx: 0, split_value:14.97, max_depth: 2
n: 241, score: 0.07
	 score_if_spli

	 score_if_split: 0.05, split_col_idx: 21, split_value:18.34, max_depth: 1
n: 5, score: 0.48, max_depth: 0
n: 124, score: 0.03, max_depth: 0
------ This is the 39th tree-----
n: 398, score: 0.47
	 score_if_split: 0.19, split_col_idx: 3, split_value:690.2, max_depth: 3
n: 275, score: 0.22
	 score_if_split: 0.17, split_col_idx: 24, split_value:0.146, max_depth: 2
n: 223, score: 0.10
	 score_if_split: 0.09, split_col_idx: 25, split_value:0.4061, max_depth: 1
n: 215, score: 0.07, max_depth: 0
n: 8, score: 0.50, max_depth: 0
n: 52, score: 0.49
	 score_if_split: 0.29, split_col_idx: 28, split_value:0.338, max_depth: 1
n: 31, score: 0.27, max_depth: 0
n: 21, score: 0.31, max_depth: 0
n: 123, score: 0.14
	 score_if_split: 0.08, split_col_idx: 13, split_value:21.83, max_depth: 2
n: 6, score: 0.28
	 score_if_split: 0.17, split_col_idx: 19, split_value:0.002256, max_depth: 1
n: 4, score: 0.00, max_depth: 0
n: 2, score: 0.50, max_depth: 0
n: 117, score: 0.07
	 score_if_split: 0.04, split_col_idx: 

n: 398, score: 0.47
	 score_if_split: 0.30, split_col_idx: 10, split_value:0.3857, max_depth: 3
n: 257, score: 0.26
	 score_if_split: 0.15, split_col_idx: 23, split_value:867.1, max_depth: 2
n: 227, score: 0.13
	 score_if_split: 0.09, split_col_idx: 6, split_value:0.1463, max_depth: 1
n: 216, score: 0.07, max_depth: 0
n: 11, score: 0.40, max_depth: 0
n: 30, score: 0.32
	 score_if_split: 0.22, split_col_idx: 9, split_value:0.05912, max_depth: 1
n: 13, score: 0.50, max_depth: 0
n: 17, score: 0.00, max_depth: 0
n: 141, score: 0.36
	 score_if_split: 0.16, split_col_idx: 3, split_value:674.5, max_depth: 2
n: 44, score: 0.42
	 score_if_split: 0.30, split_col_idx: 2, split_value:77.42, max_depth: 1
n: 18, score: 0.00, max_depth: 0
n: 26, score: 0.50, max_depth: 0
n: 97, score: 0.04
	 score_if_split: 0.02, split_col_idx: 6, split_value:0.0515, max_depth: 1
n: 5, score: 0.48, max_depth: 0
n: 92, score: 0.00, max_depth: 0
------ This is the 49th tree-----
n: 398, score: 0.47
	 score_if_split: 0.

<SemiRandomForestClassifier.SemiRandomForestClassifier at 0x1fa66955a90>

In [39]:
print('The roc_auc score of my classifier with proportion best is '+ str(roc_auc_score( y_test, my_rf_proportion.predict_probability(X_test)[:,1] ) )) 
print('The accuracy score of my classifier with proportion best is '+ str(accuracy_score( y_test, my_rf_proportion.predict(X_test) ) )) 

The roc_auc score of my classifier with proportion best is 0.9881717289719626
The accuracy score of my classifier with proportion best is 0.9766081871345029


In [40]:
my_rf_max_like = srf(n_trees = 50, split = 'max_like_best', MAX_DEPTH = 3, MAX_FEATURES = 'sqrt', bootstrap_max_samples = None, bootstrap = False)
my_rf_max_like.fit(X_train, y_train)

------ This is the 1th tree-----
n: 398, score: 0.47
	 score_if_split: 0.15, split_col_idx: 27, split_value:0.1452, max_depth: 3
n: 270, score: 0.17
	 score_if_split: 0.16, split_col_idx: 1, split_value:17.07, max_depth: 2
n: 121, score: 0.03
	 score_if_split: 0.03, split_col_idx: 25, split_value:0.2068, max_depth: 1
n: 78, score: 0.00, max_depth: 0
n: 43, score: 0.09, max_depth: 0
n: 149, score: 0.27
	 score_if_split: 0.26, split_col_idx: 29, split_value:0.06263, max_depth: 1
n: 11, score: 0.46, max_depth: 0
n: 138, score: 0.25, max_depth: 0
n: 128, score: 0.09
	 score_if_split: 0.09, split_col_idx: 12, split_value:7.561, max_depth: 2
n: 116, score: 0.10
	 score_if_split: 0.10, split_col_idx: 26, split_value:0.4185, max_depth: 1
n: 46, score: 0.04, max_depth: 0
n: 70, score: 0.13, max_depth: 0
n: 12, score: 0.00, max_depth: 1
------ This is the 2th tree-----
n: 398, score: 0.47
	 score_if_split: 0.44, split_col_idx: 5, split_value:0.05113, max_depth: 3
n: 43, score: 0.05
	 score_if_sp

n: 308, score: 0.48
	 score_if_split: 0.29, split_col_idx: 23, split_value:591.0, max_depth: 2
n: 125, score: 0.05
	 score_if_split: 0.04, split_col_idx: 4, split_value:0.1051, max_depth: 1
n: 95, score: 0.00, max_depth: 0
n: 30, score: 0.18, max_depth: 0
n: 183, score: 0.45
	 score_if_split: 0.36, split_col_idx: 10, split_value:0.6003, max_depth: 1
n: 132, score: 0.50, max_depth: 0
n: 51, score: 0.00, max_depth: 0
------ This is the 11th tree-----
n: 398, score: 0.47
	 score_if_split: 0.15, split_col_idx: 20, split_value:16.77, max_depth: 3
n: 264, score: 0.16
	 score_if_split: 0.11, split_col_idx: 25, split_value:0.3842, max_depth: 2
n: 243, score: 0.08
	 score_if_split: 0.07, split_col_idx: 0, split_value:13.9, max_depth: 1
n: 211, score: 0.03, max_depth: 0
n: 32, score: 0.34, max_depth: 0
n: 21, score: 0.47
	 score_if_split: 0.47, split_col_idx: 29, split_value:0.1034, max_depth: 1
n: 5, score: 0.48, max_depth: 0
n: 16, score: 0.47, max_depth: 0
n: 134, score: 0.13
	 score_if_split

n: 14, score: 0.00, max_depth: 1
------ This is the 20th tree-----
n: 398, score: 0.47
	 score_if_split: 0.45, split_col_idx: 8, split_value:0.2166, max_depth: 3
n: 366, score: 0.45
	 score_if_split: 0.41, split_col_idx: 4, split_value:0.08875, max_depth: 2
n: 121, score: 0.24
	 score_if_split: 0.17, split_col_idx: 23, split_value:680.6, max_depth: 1
n: 76, score: 0.00, max_depth: 0
n: 45, score: 0.47, max_depth: 0
n: 245, score: 0.49
	 score_if_split: 0.46, split_col_idx: 28, split_value:0.2848, max_depth: 1
n: 116, score: 0.42, max_depth: 0
n: 129, score: 0.49, max_depth: 0
n: 32, score: 0.45
	 score_if_split: 0.26, split_col_idx: 15, split_value:0.02839, max_depth: 2
n: 8, score: 0.22
	 score_if_split: 0.00, split_col_idx: 7, split_value:0.06556, max_depth: 1
n: 7, score: 0.00, max_depth: 0
n: 1, score: 0.00, max_depth: 0
n: 24, score: 0.28
	 score_if_split: 0.26, split_col_idx: 12, split_value:1.719, max_depth: 1
n: 2, score: 0.50, max_depth: 0
n: 22, score: 0.24, max_depth: 0
----

n: 44, score: 0.24
	 score_if_split: 0.23, split_col_idx: 5, split_value:0.07823, max_depth: 1
n: 34, score: 0.21, max_depth: 0
n: 10, score: 0.32, max_depth: 0
n: 160, score: 0.50
	 score_if_split: 0.46, split_col_idx: 1, split_value:21.9, max_depth: 1
n: 125, score: 0.49, max_depth: 0
n: 35, score: 0.35, max_depth: 0
n: 194, score: 0.44
	 score_if_split: 0.43, split_col_idx: 19, split_value:0.004452, max_depth: 2
n: 117, score: 0.40
	 score_if_split: 0.40, split_col_idx: 8, split_value:0.1717, max_depth: 1
n: 46, score: 0.31, max_depth: 0
n: 71, score: 0.45, max_depth: 0
n: 77, score: 0.48
	 score_if_split: 0.39, split_col_idx: 14, split_value:0.009538, max_depth: 1
n: 41, score: 0.49, max_depth: 0
n: 36, score: 0.28, max_depth: 0
------ This is the 30th tree-----
n: 398, score: 0.47
	 score_if_split: 0.47, split_col_idx: 11, split_value:0.9429, max_depth: 3
n: 136, score: 0.44
	 score_if_split: 0.35, split_col_idx: 27, split_value:0.07864, max_depth: 2
n: 39, score: 0.00, max_depth:

n: 398, score: 0.47
	 score_if_split: 0.15, split_col_idx: 27, split_value:0.1427, max_depth: 3
n: 268, score: 0.17
	 score_if_split: 0.13, split_col_idx: 27, split_value:0.1108, max_depth: 2
n: 225, score: 0.07
	 score_if_split: 0.04, split_col_idx: 23, split_value:873.2, max_depth: 1
n: 216, score: 0.02, max_depth: 0
n: 9, score: 0.44, max_depth: 0
n: 43, score: 0.48
	 score_if_split: 0.31, split_col_idx: 22, split_value:101.9, max_depth: 1
n: 18, score: 0.10, max_depth: 0
n: 25, score: 0.46, max_depth: 0
n: 130, score: 0.10
	 score_if_split: 0.10, split_col_idx: 11, split_value:1.156, max_depth: 2
n: 76, score: 0.10
	 score_if_split: 0.10, split_col_idx: 18, split_value:0.0155, max_depth: 1
n: 35, score: 0.16, max_depth: 0
n: 41, score: 0.05, max_depth: 0
n: 54, score: 0.10
	 score_if_split: 0.07, split_col_idx: 17, split_value:0.03441, max_depth: 1
n: 53, score: 0.07, max_depth: 0
n: 1, score: 0.00, max_depth: 0
------ This is the 40th tree-----
n: 398, score: 0.47
	 score_if_split

n: 120, score: 0.43
	 score_if_split: 0.34, split_col_idx: 16, split_value:0.01651, max_depth: 1
n: 38, score: 0.00, max_depth: 0
n: 82, score: 0.50, max_depth: 0
n: 30, score: 0.44
	 score_if_split: 0.11, split_col_idx: 23, split_value:450.0, max_depth: 1
n: 18, score: 0.00, max_depth: 0
n: 12, score: 0.28, max_depth: 0
------ This is the 49th tree-----
n: 398, score: 0.47
	 score_if_split: 0.37, split_col_idx: 3, split_value:1145.0, max_depth: 3
n: 356, score: 0.42
	 score_if_split: 0.30, split_col_idx: 13, split_value:24.72, max_depth: 2
n: 208, score: 0.17
	 score_if_split: 0.17, split_col_idx: 17, split_value:0.01184, max_depth: 1
n: 163, score: 0.13, max_depth: 0
n: 45, score: 0.32, max_depth: 0
n: 148, score: 0.49
	 score_if_split: 0.42, split_col_idx: 4, split_value:0.08875, max_depth: 1
n: 37, score: 0.39, max_depth: 0
n: 111, score: 0.43, max_depth: 0
n: 42, score: 0.00, max_depth: 2
------ This is the 50th tree-----
n: 398, score: 0.47
	 score_if_split: 0.40, split_col_idx: 

<SemiRandomForestClassifier.SemiRandomForestClassifier at 0x1fa66a68a60>

In [41]:
print('The roc_auc score of my classifier with proportion best is '+ str(roc_auc_score( y_test, my_rf_max_like.predict_probability(X_test)[:,1] ) )) 
print('The accuracy score of my classifier with proportion best is '+ str(accuracy_score( y_test, my_rf_max_like.predict(X_test) ) )) 

The roc_auc score of my classifier with proportion best is 0.9899240654205608
The accuracy score of my classifier with proportion best is 0.9707602339181286
