In [4]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, recall_score, confusion_matrix
from sklearn.model_selection import train_test_split
import time

df = pd.read_csv('breast-cancer.csv')

# Preprocess the data if needed
df_cleaned = df.dropna()

# Split features and target variable
X = df_cleaned.drop(columns=['diagnosis'])
y = df_cleaned['diagnosis']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Track training time
start_time = time.time()

# Train the Decision Tree Classifier
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

# Calculate training time
training_time = time.time() - start_time

# Make predictions on the testing set
predictions = model.predict(X_test)

# Calculate performance metrics
accuracy = accuracy_score(y_test, predictions)
sensitivity = recall_score(y_test, predictions, pos_label="M")
cm = confusion_matrix(y_test, predictions)
tn, fp, fn, tp = cm.ravel()
specificity = tn / (tn + fp)

# Visualize the Decision Tree (optional)
# tree.plot_tree(model)

# Print performance metrics
print(f"Accuracy: {accuracy:.2f}")
print(f"Sensitivity (Recall): {sensitivity:.2f}")
print(f"Specificity: {specificity:.2f}")
print(f"Confusion Matrix:\n{cm}")
print(f"Training Time: {training_time:.2f} seconds")


Accuracy: 0.90
Sensitivity (Recall): 0.84
Specificity: 0.95
Confusion Matrix:
[[62  3]
 [ 8 41]]
Training Time: 0.02 seconds
