In [None]:
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputClassifier
from sklearn.tree import DecisionTreeClassifier

import pandas as pd

In [None]:
df = pd.read_csv('dataset_semNull.csv')

df

In [None]:
X = df[
    ['dev-01', 'dev-02', 'dev-03', 'dev-04', 'dev-05', 'dev-06', 'dev-07', 'dev-08', 'dev-09', 'dev-10',
                'dev-11',]]

y = df[['SEC01-BP01', 'SEC01-BP06', 'SEC02-BP02', 'SEC02-BP03', 'SEC02-BP05', 'SEC03-BP07',
                    'SEC03-BP08', 'SEC04-BP02', 'SEC04-BP03', 'SEC11-BP01', 'SEC11-BP02', 'SEC11-BP03', 'SEC11-BP04',
                    'SEC11-BP05', 'SEC11-BP06', 'SEC11-BP07', 'SEC11-BP08']]


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Shapes of training and testing sets:")
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

In [None]:
# Initialize base classifier
base_classifier = DecisionTreeClassifier()

# Wrap base classifier in MultiOutputClassifier to handle multilabel classification
multi_label_classifier = MultiOutputClassifier(base_classifier)

In [None]:
multi_label_classifier.fit(X_train, y_train)
y_pred = multi_label_classifier.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score, classification_report

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

print("Classification Report:")
print(classification_report(y_test, y_pred))

In [None]:
import os
from sklearn.tree import plot_tree
from matplotlib import pyplot as plt

featureNames = ['dev-01', 'dev-02', 'dev-03', 'dev-04', 'dev-05', 'dev-06', 'dev-07', 'dev-08', 'dev-09', 'dev-10',
                'dev-11',]

targetNames = ['SEC01-BP01', 'SEC01-BP06', 'SEC02-BP02', 'SEC02-BP03', 'SEC02-BP05', 'SEC03-BP07',
                    'SEC03-BP08', 'SEC04-BP02', 'SEC04-BP03', 'SEC11-BP01', 'SEC11-BP02', 'SEC11-BP03', 'SEC11-BP04',
                    'SEC11-BP05', 'SEC11-BP06', 'SEC11-BP07', 'SEC11-BP08']

# Create a directory to save decision tree images
output_dir = 'decision_trees'
os.makedirs(output_dir, exist_ok=True)

# Visualize and save each decision tree in the MultiOutputClassifier
for i, estimator in enumerate(multi_label_classifier.estimators_):
    # Plot decision tree
    plt.figure(figsize=(12, 8))
    plot_tree(estimator, feature_names=featureNames, class_names=targetNames, filled=True)
    plt.title(f"Decision Tree - Estimator {i+1}")
    
    # Save the plot as an image
    image_path = os.path.join(output_dir, f"decision_tree_{i+1}.png")
    plt.savefig(image_path)
    
    # Display the plot (optional)
    plt.show()

    # Close the plot to release memory
    plt.close()

print("Decision tree images saved successfully.")