In [45]:
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

# Loading daataset
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns = data.feature_names)
y = pd.Series(data.target)

# Spliting dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

# Making decision tree model without restrictions
tree_standard = DecisionTreeClassifier(random_state = 42)
tree_standard.fit(X_train, y_train)
y_pred_standard = tree_standard.predict(X_test)

In [47]:
# Evaluating the results
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

print(f'Accuracy : {accuracy_score(y_test, y_pred_standard)}')
print(f'Confusion Matrix : \n {confusion_matrix(y_test, y_pred_standard)} \n')
print(f'Classification Report : \n {classification_report(y_test, y_pred_standard, target_names = data.target_names)}')

Accuracy : 0.9473684210526315
Confusion Matrix : 
 [[40  3]
 [ 3 68]] 

Classification Report : 
               precision    recall  f1-score   support

   malignant       0.93      0.93      0.93        43
      benign       0.96      0.96      0.96        71

    accuracy                           0.95       114
   macro avg       0.94      0.94      0.94       114
weighted avg       0.95      0.95      0.95       114



In [55]:
# Making dicision tree model with restrictions
tree_max_depth = DecisionTreeClassifier(max_depth = 3, random_state = 42)
tree_max_depth.fit(X_train, y_train)
y_pred_max_depth = tree_max_depth.predict(X_test)

tree_min_sample = DecisionTreeClassifier(min_samples_leaf = 5, random_state = 42)
tree_min_sample.fit(X_train, y_train)
y_pred_min_sample = tree_min_sample.predict(X_test)

In [57]:
def evaluate_model(name, y_true, y_pred):
    print(name)
    print("Accuracy : ", accuracy_score(y_true, y_pred))
    print(classification_report(y_true, y_pred, target_names = data.target_names))
    print("-" * 55)

evaluate_model("Tree A (Standard)", y_test, y_pred_standard)
evaluate_model("Tree B (Max_depth = 3)", y_test, y_pred_max_depth)
evaluate_model("Tree C (Min_samples_leaf = 5)", y_test, y_pred_min_sample)

Tree A (Standard)
Accuracy :  0.9473684210526315
              precision    recall  f1-score   support

   malignant       0.93      0.93      0.93        43
      benign       0.96      0.96      0.96        71

    accuracy                           0.95       114
   macro avg       0.94      0.94      0.94       114
weighted avg       0.95      0.95      0.95       114

-------------------------------------------------------
Tree B (Max_depth = 3)
Accuracy :  0.9473684210526315
              precision    recall  f1-score   support

   malignant       0.95      0.91      0.93        43
      benign       0.95      0.97      0.96        71

    accuracy                           0.95       114
   macro avg       0.95      0.94      0.94       114
weighted avg       0.95      0.95      0.95       114

-------------------------------------------------------
Tree C (Min_samples_leaf = 5)
Accuracy :  0.956140350877193
              precision    recall  f1-score   support

   malignant    