In [1]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score, precision_score, f1_score
import numpy as np
iris = load_iris()
iris_df = pd.DataFrame(data=np.c_[iris['data'], iris['target']],
                       columns=iris['feature_names'] + ['target'])
X = iris_df.drop('target', axis=1)
y = iris_df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
recall_scores = []
precision_scores = []
f1_scores = []
for depth in range(1, 6):
    clf = DecisionTreeClassifier(min_samples_leaf=2, min_samples_split=5, max_depth=depth)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    recall = recall_score(y_test, y_pred, average='weighted')
    precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted')
    recall_scores.append(recall)
    precision_scores.append(precision)
    f1_scores.append(f1)
best_recall_depth = np.argmax(recall_scores) + 1
print(f"Depth with the highest recall: {best_recall_depth}")
print("Reason: A higher depth might allow the tree to capture more complex patterns in the data, leading to better identification of positive cases and thus higher recall.")
lowest_precision_depth = np.argmin(precision_scores) + 1
print(f"Depth with the lowest precision: {lowest_precision_depth}")
print("Reason: A very deep tree might overfit the training data, leading to false positives and thus lower precision.")
best_f1_depth = np.argmax(f1_scores) + 1
print(f"Depth with the best F1 score: {best_f1_depth}")
print("Micro: Calculates metrics globally by counting the total true positives, false negatives, and false positives. It gives equal weight to each individual prediction.")
print("Macro: Calculates metrics for each class separately and then takes the unweighted average. It treats all classes equally, regardless of their prevalence.")
print("Weighted: Calculates metrics for each class separately and then takes the average, weighted by the number of samples in each class. It is useful when dealing with imbalanced datasets.")

Depth with the highest recall: 2
Reason: A higher depth might allow the tree to capture more complex patterns in the data, leading to better identification of positive cases and thus higher recall.
Depth with the lowest precision: 1
Reason: A very deep tree might overfit the training data, leading to false positives and thus lower precision.
Depth with the best F1 score: 2
Micro: Calculates metrics globally by counting the total true positives, false negatives, and false positives. It gives equal weight to each individual prediction.
Macro: Calculates metrics for each class separately and then takes the unweighted average. It treats all classes equally, regardless of their prevalence.
Weighted: Calculates metrics for each class separately and then takes the average, weighted by the number of samples in each class. It is useful when dealing with imbalanced datasets.
