# Classifying with Decision Trees, Random Forest and Gradient Boost

In [32]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_validate
from sklearn.tree import DecisionTreeClassifier

## Loading data (Iris-Data)

In [27]:
# Loading data and extract feature-data and target-data
iris_dict = load_iris(as_frame=True)
iris_features = iris_dict['data']
iris_target = iris_dict['target']

# Splitting into test and training data
X_train, X_test, y_train, y_test = train_test_split(iris_features, iris_target)

## Decision Tree Classifier

In [30]:
tree = DecisionTreeClassifier(max_depth=3)
tree.fit(X_train, y_train)

In [33]:
# Compare score on training data with score on test data, to check for overfitting
print(f'Accuracy on training data set:\t{tree.score(X_train, y_train):.0%}')
print(f'Accuracy on test data set:\t{tree.score(X_test, y_test):.0%}')
print('Result depends on chosen train/test-split. Crossvalidation is used to overcome this and\
tube hyperparameters.')

Accuracy on training data set:	98%
Accuracy on test data set:	97%
Result depends on chosen train/test-split. Crossvalidation is used to overcome this andtube hyperparameters.


In [38]:
# Generate train/test-split with approx. preserved distribution of target classes
splitter_strat = StratifiedKFold(10)
for i, split in enumerate(splitter_strat.split(iris_features, iris_target)):
    print(split)

(array([  5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,
        18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,
        31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,
        44,  45,  46,  47,  48,  49,  55,  56,  57,  58,  59,  60,  61,
        62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
        75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,
        88,  89,  90,  91,  92,  93,  94,  95,  96,  97,  98,  99, 105,
       106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118,
       119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131,
       132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144,
       145, 146, 147, 148, 149]), array([  0,   1,   2,   3,   4,  50,  51,  52,  53,  54, 100, 101, 102,
       103, 104]))
(array([  0,   1,   2,   3,   4,  10,  11,  12,  13,  14,  15,  16,  17,
        18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,
        3