In [2]:
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

iris = load_iris()
X, y = iris['data'][:, 2:], iris['target']
X_train, X_test, y_train, y_test = train_test_split(X, y)

print(X.shape, y.shape)

(150, 2) (150,)


In [3]:
from sklearn.metrics import confusion_matrix

tree_clf = DecisionTreeClassifier(max_depth=2)
tree_clf.fit(X_train, y_train)

confusion_matrix(y_test, tree_clf.predict(X_test))

array([[15,  0,  0],
       [ 0, 11,  1],
       [ 0,  2,  9]])

In [6]:
from sklearn.tree import export_graphviz

export_graphviz(
  tree_clf,
  out_file='iris_tree.dot',
  feature_names=iris['feature_names'][2:],
  class_names=iris['target_names'],
  rounded=True,
  filled=True,
)

In [7]:
tree_clf.predict_proba(X_test)

array([[1.        , 0.        , 0.        ],
       [0.        , 0.        , 1.        ],
       [0.        , 0.        , 1.        ],
       [0.        , 0.        , 1.        ],
       [0.        , 0.92682927, 0.07317073],
       [1.        , 0.        , 0.        ],
       [0.        , 0.92682927, 0.07317073],
       [1.        , 0.        , 0.        ],
       [1.        , 0.        , 0.        ],
       [0.        , 0.92682927, 0.07317073],
       [1.        , 0.        , 0.        ],
       [0.        , 0.92682927, 0.07317073],
       [0.        , 0.        , 1.        ],
       [1.        , 0.        , 0.        ],
       [0.        , 0.92682927, 0.07317073],
       [0.        , 0.92682927, 0.07317073],
       [0.        , 0.        , 1.        ],
       [0.        , 0.92682927, 0.07317073],
       [0.        , 0.        , 1.        ],
       [1.        , 0.        , 0.        ],
       [1.        , 0.        , 0.        ],
       [0.        , 0.92682927, 0.07317073],
       [0.

In [31]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
import numpy as np

m = 1000

X = np.random.rand(m, 1)
y = ((5 * X ** 2) + (10 * X) + 5) + np.random.rand(m, 1)

print(X.shape, y.shape)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

tree_reg = DecisionTreeRegressor(max_depth=4)
tree_reg.fit(X_train, y_train)

mean_squared_error(y_test, tree_reg.predict(X_test))



(1000, 1) (1000, 1)


0.17421707268200876

In [34]:
from sklearn.datasets import make_moons

X, y = make_moons(n_samples=10000, noise=0.4)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [47]:
from sklearn.model_selection import GridSearchCV

params = {
  'max_leaf_nodes': [35, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50],
  'max_depth': [1, 5, 10, 20, 35, 50]
}

grid = GridSearchCV(DecisionTreeClassifier(), params, scoring='precision', cv=3)
grid.fit(X_train, y_train)
print(grid.best_params_)

{'max_depth': 10, 'max_leaf_nodes': 43}


In [48]:
from sklearn.metrics import precision_score, recall_score, confusion_matrix

clf = DecisionTreeClassifier(**grid.best_params_)
clf.fit(X_train, y_train)

pred = clf.predict(X_test)
print(precision_score(y_test, pred))
print(recall_score(y_test, pred))
print(confusion_matrix(y_test, pred))

0.8251953125
0.8693415637860082
[[849 179]
 [127 845]]


In [63]:
from sklearn.model_selection import ShuffleSplit
from sklearn.base import clone
X, y = make_moons(n_samples=int((100) / 0.8), noise=0.4)
rs = ShuffleSplit(n_splits=1000, test_size= 0.2)

clfs = []

for traini, testi in rs.split(X, y):
  X_train, X_test, y_train, y_test = X[traini], X[testi], y[traini], y[testi]
  clf = DecisionTreeClassifier(**grid.best_params_)
  clf.fit(X_train, y_train)
  print(precision_score(y_test, clf.predict(X_test)))
  clfs.append(clf)
  


1.0
0.8181818181818182
0.75
0.7058823529411765
0.9
0.7857142857142857
0.8333333333333334
0.8333333333333334
0.9230769230769231
0.8235294117647058
0.8571428571428571
0.75
0.6666666666666666
0.8461538461538461
0.9230769230769231
0.7692307692307693
0.8181818181818182
0.9166666666666666
0.9090909090909091
0.8571428571428571
0.8
0.7272727272727273
0.7857142857142857
0.7333333333333333
0.9285714285714286
0.8
0.75
0.6363636363636364
0.8125
0.8571428571428571
0.9166666666666666
0.75
0.875
0.8461538461538461
0.5454545454545454
1.0
1.0
0.7692307692307693
0.7777777777777778
0.9
0.6363636363636364
0.8333333333333334
0.6923076923076923
0.5
0.75
0.625
0.8181818181818182
0.9090909090909091
0.8
0.8333333333333334
0.8181818181818182
0.9230769230769231
0.8461538461538461
0.7272727272727273
1.0
0.6428571428571429
1.0
0.5714285714285714
0.6470588235294118
0.6
0.9333333333333333
0.8571428571428571
0.7857142857142857
1.0
0.7692307692307693
0.9
0.75
0.75
0.7857142857142857
0.7
0.7333333333333333
0.75
0.92857

In [75]:
from scipy.stats import mode

X, y = make_moons(n_samples=1000)

pred = mode([clfs[i].predict(X) for i in range(len(clfs))]).mode
print(precision_score(y, pred.ravel()))
print(recall_score(y, pred.ravel()))
print(confusion_matrix(y, pred.ravel()))

0.9403292181069959
0.914
[[471  29]
 [ 43 457]]
