In [2]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the Breast Cancer dataset
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
tree_classifier = DecisionTreeClassifier(random_state=42)

# Train the model on the training data
tree_classifier.fit(X_train, y_train)

In [4]:
y_pred = tree_classifier.predict(X_test)

# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
confusion = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print("\nConfusion Matrix:")
print(confusion)
print("\nClassification Report:")
print(classification_rep)

Accuracy: 0.95

Confusion Matrix:
[[40  3]
 [ 3 68]]

Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.93      0.93        43
           1       0.96      0.96      0.96        71

    accuracy                           0.95       114
   macro avg       0.94      0.94      0.94       114
weighted avg       0.95      0.95      0.95       114



In [29]:
tree = tree_classifier.tree_

def traverse_tree(node, X, feature_names):
    if tree.children_left[node] == tree.children_right[node]:
        if(tree.value[node][0][1]>tree.value[node][0][0]):
            c="You have cancer you will mostly die"
        else:
            c="You don't have cancer you will not die"
        print("Leaf Node -",c)
        return tree.value[node]
    
    feature_index = tree.feature[node]
    threshold = tree.threshold[node]

    feature_name = feature_names[feature_index]
    feature_value = X[0, feature_index]

    print(f"Node: We are examining feature '{feature_name}', whose value {feature_value}, which will be compared to the node threshold {threshold}")
    
    if feature_value <= threshold:
        print(f"Going left: as feature '{feature_name}' whose value is {feature_value} is less than or equal to node threshold value, {threshold}")
        return traverse_tree(tree.children_left[node], X, feature_names)
    else:
        print(f"Going right: as feature '{feature_name}' whose value is {feature_value} is greater than node threshold value, {threshold}")
        return traverse_tree(tree.children_right[node], X, feature_names)

X_new = X_test.iloc[0].values.reshape(1, -1)
feature_names = X.columns.tolist()

# tree_prediction = tree_classifier.predict(X_new)

# print(f"Tree Classifier Prediction for the input data: {tree_prediction[0]}")

prediction = traverse_tree(0, X_new, feature_names)


Node: We are examining feature 'mean concave points', whose value 0.03821, which will be compared to the node threshold 0.05127999931573868
Going left: as feature 'mean concave points' whose value is 0.03821 is less than or equal to node threshold value, 0.05127999931573868
Node: We are examining feature 'worst radius', whose value 14.97, which will be compared to the node threshold 16.829999923706055
Going left: as feature 'worst radius' whose value is 14.97 is less than or equal to node threshold value, 16.829999923706055
Node: We are examining feature 'area error', whose value 30.29, which will be compared to the node threshold 48.70000076293945
Going left: as feature 'area error' whose value is 30.29 is less than or equal to node threshold value, 48.70000076293945
Node: We are examining feature 'worst smoothness', whose value 0.1426, which will be compared to the node threshold 0.17764999717473984
Going left: as feature 'worst smoothness' whose value is 0.1426 is less than or equal

In [10]:
feature_names

['mean radius',
 'mean texture',
 'mean perimeter',
 'mean area',
 'mean smoothness',
 'mean compactness',
 'mean concavity',
 'mean concave points',
 'mean symmetry',
 'mean fractal dimension',
 'radius error',
 'texture error',
 'perimeter error',
 'area error',
 'smoothness error',
 'compactness error',
 'concavity error',
 'concave points error',
 'symmetry error',
 'fractal dimension error',
 'worst radius',
 'worst texture',
 'worst perimeter',
 'worst area',
 'worst smoothness',
 'worst compactness',
 'worst concavity',
 'worst concave points',
 'worst symmetry',
 'worst fractal dimension']

In [11]:
X_new

array([[1.247e+01, 1.860e+01, 8.109e+01, 4.819e+02, 9.965e-02, 1.058e-01,
        8.005e-02, 3.821e-02, 1.925e-01, 6.373e-02, 3.961e-01, 1.044e+00,
        2.497e+00, 3.029e+01, 6.953e-03, 1.911e-02, 2.701e-02, 1.037e-02,
        1.782e-02, 3.586e-03, 1.497e+01, 2.464e+01, 9.605e+01, 6.779e+02,
        1.426e-01, 2.378e-01, 2.671e-01, 1.015e-01, 3.014e-01, 8.750e-02]])