# Decision Tree Classifier, Hyperparamètres et Botanique.
*src : https://simplonline.co/briefs/2f2ed1d0-9bd0-4e1b-9e4f-e7c7c2da6783*

*Importation des bibliothèques python et du dataset iris*

In [31]:
# Importation des bibliothèques python
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn import tree


# Importation du dataset iris
iris = load_iris()

*Préparation des données*

In [32]:
# Affichage de la description du dataset iris
print(iris.DESCR)

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [33]:
# Séparation du daataset iris
X = iris.data[:, :2]  # On ne récupère que les caractéristiques "petal length" et "petal width"
y = iris.target

In [34]:
# Affichage des données
print(X, y)

[[5.1 3.5]
 [4.9 3. ]
 [4.7 3.2]
 [4.6 3.1]
 [5.  3.6]
 [5.4 3.9]
 [4.6 3.4]
 [5.  3.4]
 [4.4 2.9]
 [4.9 3.1]
 [5.4 3.7]
 [4.8 3.4]
 [4.8 3. ]
 [4.3 3. ]
 [5.8 4. ]
 [5.7 4.4]
 [5.4 3.9]
 [5.1 3.5]
 [5.7 3.8]
 [5.1 3.8]
 [5.4 3.4]
 [5.1 3.7]
 [4.6 3.6]
 [5.1 3.3]
 [4.8 3.4]
 [5.  3. ]
 [5.  3.4]
 [5.2 3.5]
 [5.2 3.4]
 [4.7 3.2]
 [4.8 3.1]
 [5.4 3.4]
 [5.2 4.1]
 [5.5 4.2]
 [4.9 3.1]
 [5.  3.2]
 [5.5 3.5]
 [4.9 3.6]
 [4.4 3. ]
 [5.1 3.4]
 [5.  3.5]
 [4.5 2.3]
 [4.4 3.2]
 [5.  3.5]
 [5.1 3.8]
 [4.8 3. ]
 [5.1 3.8]
 [4.6 3.2]
 [5.3 3.7]
 [5.  3.3]
 [7.  3.2]
 [6.4 3.2]
 [6.9 3.1]
 [5.5 2.3]
 [6.5 2.8]
 [5.7 2.8]
 [6.3 3.3]
 [4.9 2.4]
 [6.6 2.9]
 [5.2 2.7]
 [5.  2. ]
 [5.9 3. ]
 [6.  2.2]
 [6.1 2.9]
 [5.6 2.9]
 [6.7 3.1]
 [5.6 3. ]
 [5.8 2.7]
 [6.2 2.2]
 [5.6 2.5]
 [5.9 3.2]
 [6.1 2.8]
 [6.3 2.5]
 [6.1 2.8]
 [6.4 2.9]
 [6.6 3. ]
 [6.8 2.8]
 [6.7 3. ]
 [6.  2.9]
 [5.7 2.6]
 [5.5 2.4]
 [5.5 2.4]
 [5.8 2.7]
 [6.  2.7]
 [5.4 3. ]
 [6.  3.4]
 [6.7 3.1]
 [6.3 2.3]
 [5.6 3. ]
 [5.5 2.5]
 [5.5 2.6]

*Manipulation des données*

In [39]:
# Séparation des données du dataset iris en training set et test set
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.7,random_state=42)

In [40]:
# Entrainement du modèle
depths = [3, 4, 10]
for i in range(len(depths)):
    # Chargement du modèle
    clf = tree.DecisionTreeClassifier(max_depth=depths[i])
    
    # Affichage de la profondeur
    print("max depth = {}".format(depths[i]))
    
    # Ajustement du modèle au données
    clf.fit(X_train, y_train)
    
    # Test de prédiction
    y_pred = clf.predict(X_test)
    print(y_pred)
    
    # Affichage de l'accuracy du modèle
    accuracy = accuracy_score(y_test, y_pred)
    accuracy
    
    # Affichage de la matrice de confussion
    print(confusion_matrix(y_test, y_pred))
    
    # Affichage du rapport de classification
    print(classification_report(y_test, y_pred))

max depth = 3
[1 0 1 1 1 0 1 2 1 1 2 0 0 0 0 2 2 1 1 1 0 1 0 1 0 2 1 2 0 0 0 0 2 0 0 1 2
 0 0 0 1 0 2 0 0 1 1 2 2 0 1 2 1 0 1 1 0 0 0 0 1 0 0 1 1 0 1 0 0 1 1 0 1 1
 1 1 0 1 0 0 1 0 0 1 2 0 1 0 0 1 1 2 1 1 2 1 0 0 1 2 0 0 0 1 0]
[[39  1  0]
 [ 3 24  6]
 [ 5 17 10]]
              precision    recall  f1-score   support

           0       0.83      0.97      0.90        40
           1       0.57      0.73      0.64        33
           2       0.62      0.31      0.42        32

    accuracy                           0.70       105
   macro avg       0.68      0.67      0.65       105
weighted avg       0.69      0.70      0.67       105

max depth = 4
[1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 2 2 1 1 1 0 1 0 1 0 2 1 2 0 0 0 0 2 0 0 1 2
 0 0 0 1 0 2 0 0 1 1 2 2 0 1 2 1 0 2 1 0 0 0 0 1 0 0 1 1 0 1 0 0 1 1 0 1 1
 1 1 0 1 0 0 1 0 0 1 2 0 1 0 0 1 1 2 1 1 2 1 0 0 1 2 0 0 0 1 0]
[[39  1  0]
 [ 3 24  6]
 [ 5 15 12]]
              precision    recall  f1-score   support

           0       0.83      0.97 