In [1]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score

In [2]:
data = {'X1': [30,30,30,30,60,60,60,60],
        'X2': [0,0,1,1,0,0,1,1],
        'X3': [10,70,20,80,40,60,50,60],
        'C': [0,0,0,1,0,1,0,1]}

df = pd.DataFrame(data)
df

Unnamed: 0,X1,X2,X3,C
0,30,0,10,0
1,30,0,70,0
2,30,1,20,0
3,30,1,80,1
4,60,0,40,0
5,60,0,60,1
6,60,1,50,0
7,60,1,60,1


In [3]:
# Split the dataset into features (X) and the target (y)
X = df[['X1', 'X2', 'X3']]
y = df['C']

# Create a Decision Tree classifier
classifier = DecisionTreeClassifier(random_state=0)
classifier.fit(X, y)

# Get predicted probabilities for the positive class (class 1)
predicted_probabilities = classifier.predict_proba(X)[:, 1]

# Sort the predicted probabilities and associated labels
sorted_probs, sorted_labels = zip(*sorted(zip(predicted_probabilities, y), reverse=True))

# Initialize variables to keep track of best threshold and performance
best_threshold = None
best_f1_score = 0.0

# Iterate over sorted probabilities to find the threshold
for i in range(len(sorted_probs)):
    threshold = sorted_probs[i]
    predicted_labels = [1 if prob >= threshold else 0 for prob in predicted_probabilities]
    
    # Calculate precision, recall, and F1-score
    precision = precision_score(y, predicted_labels)
    recall = recall_score(y, predicted_labels)
    f1_score = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0.0
    
    # Update best threshold if F1-score improves
    if f1_score > best_f1_score:
        best_threshold = threshold
        best_f1_score = f1_score

    print(f"Threshold: {threshold:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1-Score: {f1_score:.4f}")

print(f"Best Threshold: {best_threshold:.4f}, Best F1-Score: {best_f1_score:.4f}")


Threshold: 1.0000, Precision: 1.0000, Recall: 1.0000, F1-Score: 1.0000
Threshold: 1.0000, Precision: 1.0000, Recall: 1.0000, F1-Score: 1.0000
Threshold: 1.0000, Precision: 1.0000, Recall: 1.0000, F1-Score: 1.0000
Threshold: 0.0000, Precision: 0.3750, Recall: 1.0000, F1-Score: 0.5455
Threshold: 0.0000, Precision: 0.3750, Recall: 1.0000, F1-Score: 0.5455
Threshold: 0.0000, Precision: 0.3750, Recall: 1.0000, F1-Score: 0.5455
Threshold: 0.0000, Precision: 0.3750, Recall: 1.0000, F1-Score: 0.5455
Threshold: 0.0000, Precision: 0.3750, Recall: 1.0000, F1-Score: 0.5455
Best Threshold: 1.0000, Best F1-Score: 1.0000


In [4]:
import graphviz
from sklearn.tree import export_graphviz

# Export the decision tree to DOT format
dot_data = export_graphviz(classifier, out_file=None, 
                           feature_names=X.columns,  
                           class_names=["Class 0", "Class 1"],  
                           filled=True, rounded=True,  
                           special_characters=True)

# Replace the default node attributes in the DOT data
dot_data = dot_data.replace('node [shape=box, ', 'node [shape=square, style=filled, fillcolor="#0000000", ')

# Create a Graphviz graph from the modified DOT data
graph = graphviz.Source(dot_data)

# Render the graph to a file (e.g., "tree.png")
graph.render("tree")

# Display the graph in a Jupyter Notebook
graph.view()



'tree.pdf'