-
Notifications
You must be signed in to change notification settings - Fork 0
/
tree.py
63 lines (57 loc) · 2.24 KB
/
tree.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
""" Run tree models for binary classification and numerical features """
# Good base info
# https://sefiks.com/2018/08/27/a-step-by-step-cart-decision-tree-example/
# https://github.com/serengil/chefboost
# imports
import numpy as np
from chefboost import Chefboost as chef
from sklearn import metrics
# Run tree
# receives: features (numerical) and target (binary)
# returns: information and metrics of classification
def run_tree(X_train, X_test, y_train, y_test, algorithm, randomState=100):
# test list
y_test_list = y_test.tolist()
# join dataset, target column must be named "Decision"
df = X_train.assign(Decision=y_train.astype(str).values)
# decision tree
config = {'algorithm': algorithm,
'enableParallelism': True, 'max_depth': 7, 'num_cores': 4} # max depth not working on 26/10/2021, due to library bug
# fit train
model = chef.fit(df, config)
# predict
y_pred_prob_list = [] # positive probability ('1')
for index in range(len(X_test)):
newInstance = X_test.iloc[index, :].values
prediction = chef.predict(model, newInstance)
y_pred_prob_list.append(float(prediction))
# y pred categorical
y_pred_categ_list = list( map(lambda a : 1 if a > 0.5 else 0, y_pred_prob_list) )
# results
resultDict = {}
# save test y pred
resultDict['y_true'] = y_test_list
resultDict['y_pred_categ_list'] = y_pred_categ_list
resultDict['y_pred_prob_list'] = y_pred_prob_list
# Calculate metrics
# confusion matrix
confMat = metrics.confusion_matrix(y_test_list, y_pred_categ_list)
tn, fp, fn, tp = metrics.confusion_matrix(y_test_list, y_pred_categ_list).ravel()
resultDict['tn'] = tn
resultDict['fp'] = fp
resultDict['fn'] = fn
resultDict['tp'] = tp
# accuracy
acc = metrics.accuracy_score(y_test_list, y_pred_categ_list)
resultDict['acc'] = acc
# precission
precission = metrics.precision_score(y_test_list, y_pred_categ_list)
resultDict['prec'] = precission
# recall
recall = metrics.recall_score(y_test_list, y_pred_categ_list)
resultDict['rec'] = recall
# auc
auc = metrics.roc_auc_score(y_test_list, y_pred_prob_list)
resultDict['auc'] = auc
# returns
return resultDict