# Evaluate ML framework for decision tree models

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.cm import ScalarMappable
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV, GridSearchCV, cross_val_score
import sklearn
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
import seaborn as sns
import plotly.express as px
from skopt.plots import plot_convergence
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, precision_score, recall_score
from collections import Counter
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline
import xgboost.sklearn as xgb
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.inspection import permutation_importance
from sklearn.neural_network import MLPClassifier
from skopt.space import Real, Integer, Categorical
from skopt.utils import use_named_args
from skopt import gp_minimize
from itertools import product
from sklearn.utils import resample
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import make_scorer
from tabulate import tabulate
import warnings
import os
from mlxai4cat.utils.data import prepare_dataset, stratified_sampling, resampling 
from mlxai4cat.utils.visualization import get_formatted_results, plot_feature_importance, plot_feature_importance_distribution
warnings.filterwarnings("ignore")
warnings.filterwarnings("ignore", category=UserWarning, module="skopt")

In [None]:
%load_ext autoreload
%autoreload 2

## Import data

In [None]:
dataset, X, y, X_pos, y_pos, X_neg, y_neg, feature_names = prepare_dataset('../data/ocm_cat_data.csv')


In [None]:
dataset

In [None]:
dataset.corr(numeric_only=True)
#plot the heatmap of the correlation matrix
plt.figure(figsize=(20,20))
sns.heatmap(dataset.corr(numeric_only=True), annot=True, fmt=".1f", cmap='coolwarm', center=0)

## Checking robustness of decision tree's performance metrics

In [None]:
n = 100
acc = []
f1 =[]

# training 100 different decision tree models on random training/test splits 
for rs in range(n):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = rs*1234+567)
    clf = DecisionTreeClassifier(random_state = rs).fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    acc.append(accuracy_score(y_test, y_pred))
    f1.append(f1_score(y_test, y_pred))

# print(f'Accuracy scores: {acc}')
# print(f'F1 scores: {f1}')
print(f'Accuracy scores mean: {np.mean(acc)}')
print(f'Accuracy scores standard deviation: {np.std(acc)}')
print(f'F1 scores mean: {np.mean(f1)}')
print(f'F1 scores mean standard deviation: {np.std(f1)}')

In [None]:
# colors = ['#377eb8', '#ff7f00', '#4daf4a', '#984ea3']

In [None]:
plt.figure(figsize=(8,6))
plt.axvline(np.mean(acc), color='black', linestyle='dashed', linewidth=3) # Mean of the distribution
plt.axvline(np.mean(f1), color='black', linestyle='dashed', linewidth=3)
plt.text(np.mean(acc) - 0.05, 19, f'Mean Acc: {np.mean(acc):.2f}', fontsize=14, color='black')
plt.text(np.mean(f1) - 0.05, 16, f'Mean F1: {np.mean(f1):.2f}', fontsize=14, color='black')
plt.hist(np.array(acc), bins = 14, color='#c0c0c0', label='Accuracy')
plt.hist(np.array(f1), bins = 20, color='#3cb371', label='F1-score')
plt.xticks(fontsize= 14) 
plt.yticks(fontsize= 14) 
plt.xlabel('Distribution of performance metrics',fontsize= 16)
plt.ylabel('Frequency',fontsize= 16)
plt.legend(fontsize= 14)
plt.xlim(0, 1)
plt.ylim(0, 20)
plt.savefig('../figures/first_DT_accuracy_distribution_without_group.png')
plt.show()

### Stratified sampling

In the whole dataset, there are 51 positive catalysts and 240 non-positive catalysts. To avoid extremely unbalanced data in training set or testing set due to random split, we use stratified sampling to ensure the same proportion of postive catalysts in training set and testing set with the orignal data set.

### Resampling

To further overcome the class imbalance, we resample the training data set (size=232, Pos:Non_Pos = 41:191) through a combination of oversampling and undersampling. We first do oversampling using the most popular method SMOTE with a ratio of 0.6 = (232/2) : 191. This ratio refers to the desired ratio of the number of samples in the minority class over the number of samples in the majority class after resampling. We then do random undersampling with a ratio of 1 to ensure equal sample size of two classes.

In [None]:
# Double checking the resampling process, to find out how many new samples are generated
X_train, y_train, X_test, y_test = stratified_sampling(X_pos, X_neg, y_pos, y_neg, rs * 1234 + 567)
X_train, y_train = resampling(X_train, y_train, overratio=0.6, underratio=1, randomstate=123)
original_indices = []
for x in X:
    result = np.where(np.all(X_train == x, axis=1))
    if result[0].size > 0:
        original_indices.append(result[0][0])

original_indices = np.unique(original_indices)

# Output results
if len(original_indices) == len(X):
    print("All original samples are in the resampled dataset")
else:
    num_new_samples = len(X_train) - len(original_indices)
    print(f"There are {num_new_samples} new, synthetically generated samples in the resampled dataset")


### Decision tree training and nested evaluation

In [None]:
# In  this Decision Tree, there are n iterations, each iteration has a different random state. The random state of Decision Tree is fixed to 0, for the purpose of comparison.

n = 100

# Initialize lists to store evaluation metrics
acc_tree = []
precision_tree = []
recall_tree = []
f1_tree = []

# Initialize a list to store feature importance values for each split
feature_importances_tree = []

for rs in range(n):
    X_train, y_train, X_test, y_test = stratified_sampling(X_pos, X_neg, y_pos, y_neg, rs * 1234 + 567)
    X_train, y_train = resampling(X_train, y_train, overratio=0.6, underratio=1, randomstate=123)

    clf = DecisionTreeClassifier(random_state=0).fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    
    # Calculate evaluation metrics and store them
    acc_tree.append(accuracy_score(y_test, y_pred))
    precision_tree.append(precision_score(y_test, y_pred, zero_division=1))
    recall_tree.append(recall_score(y_test, y_pred))
    f1_tree.append(f1_score(y_test, y_pred))
    
    # Store feature importance for this split
    feature_importances_tree.append(clf.feature_importances_)

    # Show an example tree for a specific iteration (e.g., 50th iteration)
    if rs == 50:
        plt.figure(figsize=(30, 15))
        plot_tree(clf, filled=False, fontsize=10, feature_names=feature_names)
        plt.title("A sample decision tree on training set")
        plt.show()

### Display different performance metrics for the decision tree model

In [None]:
df_metrics = get_formatted_results(acc_tree, f1_tree, precision_tree, recall_tree, model_name="Decision Tree", verbose=True, df_metrics = None)

In [None]:
df_metrics

### Plot feature importances obtained using the decision tree's own impurity measure

In [None]:
df_feature_importance = plot_feature_importance(feature_importances_tree, feature_names, model_name="Decision tree", df_feature_importance=None, savedir='../figures')
df_feature_importance

In [None]:
plot_feature_importance_distribution(feature_importances_tree, feature_names, "Decision Tree", color='gray', savedir='../figures')

### Display distribution of scores using stratified sampling and resampling

In [None]:
plt.figure(figsize=(8,6))
plt.axvline(np.mean(acc_tree), color='black', linestyle='dashed', linewidth=3)
plt.axvline(np.mean(f1_tree), color='black', linestyle='dashed', linewidth=3)
plt.text(np.mean(acc_tree) - 0.05, 19, f'Mean Acc: {np.mean(acc_tree):.2f}', fontsize=14, color='black')
plt.text(np.mean(f1_tree) - 0.05, 16, f'Mean F1: {np.mean(f1_tree):.2f}', fontsize=14, color='black')

plt.hist(np.array(acc_tree), bins = 14, color='#c0c0c0', label='Accuracy')
plt.hist(np.array(f1_tree), bins = 20, color='#3cb371', label='F1-score')
plt.xticks(fontsize= 14) 
plt.yticks(fontsize= 14) 
plt.xlabel('Distribution of performance metrics',fontsize= 16)
plt.ylabel('Frequency',fontsize= 16)
plt.legend(fontsize= 14)
plt.xlim(0, 1)
plt.ylim(0, 20)
plt.savefig('../figures/pipeline_DT_accuracy_distribution_without_group.png', dpi=300)

## Decision Tree without RESAMPLING

### Training and nested-cross validation

In [None]:
n = 100

# Initialize lists to store evaluation metrics
acc_tree_nr = []
precision_tree_nr = []
recall_tree_nr = []
f1_tree_nr = []
feature_importances_tree_nr = []

for rs in range(n):
    X_train, y_train, X_test, y_test = stratified_sampling(X_pos, X_neg, y_pos, y_neg, rs * 1234 + 567)

    clf_nr = DecisionTreeClassifier(random_state=0).fit(X_train, y_train)
    y_pred_nr = clf_nr.predict(X_test)
    
    # Calculate evaluation metrics and store them
    acc_tree_nr.append(accuracy_score(y_test, y_pred_nr))
    precision_tree_nr.append(precision_score(y_test, y_pred_nr, zero_division=1))
    recall_tree_nr.append(recall_score(y_test, y_pred_nr))
    f1_tree_nr.append(f1_score(y_test, y_pred_nr))
    
    # Store feature importance for this split
    feature_importances_tree_nr.append(clf_nr.feature_importances_)


# EVALUATION METRICS
# Calculate mean and standard deviation of evaluation metrics
mean_acc_tree_nr = np.mean(acc_tree_nr)
std_acc_tree_nr = np.std(acc_tree_nr)
mean_f1_tree_nr = np.mean(f1_tree_nr)
std_f1_tree_nr = np.std(f1_tree_nr)
mean_precision_tree_nr = np.mean(precision_tree_nr)
std_precision_tree_nr = np.std(precision_tree_nr)
mean_recall_tree_nr = np.mean(recall_tree_nr)
std_recall_tree_nr = np.std(recall_tree_nr)

# Create a list of tuples for the table
table_data_nr = [
    ('Metric', 'Mean', 'Standard Deviation'),
    ('Accuracy', mean_acc_tree_nr, std_acc_tree_nr),
    ('F1 Score', mean_f1_tree_nr, std_f1_tree_nr),
    ('Precision', mean_precision_tree_nr, std_precision_tree_nr),
    ('Recall', mean_recall_tree_nr, std_recall_tree_nr)
]

# Print the table
print(tabulate(table_data_nr, headers='firstrow', tablefmt='fancy_grid'))

# Create a dictionary with the data
data_nr = {
    'Model': ['Decision Tree'],
    'Accuracy_Mean': [mean_acc_tree_nr],
    'Accuracy_Std': [std_acc_tree_nr],
    'F1_Mean': [mean_f1_tree_nr],
    'F1_Std': [std_f1_tree_nr],
    'Precision_Mean': [mean_precision_tree_nr],
    'Precision_Std': [std_precision_tree_nr],
    'Recall_Mean': [mean_recall_tree_nr],
    'Recall_Std': [std_recall_tree_nr]
}

# Create the DataFrame
df_metrics_nr = pd.DataFrame(data_nr)

# Display the DataFrame
print(df_metrics_nr)


### Feature importance scores for decision tree without resampling

In [None]:
df_feature_importance_nr = plot_feature_importance(feature_importances_tree_nr, feature_names, model_name="Decision tree", df_feature_importance=None, savedir='../figures')
df_feature_importance_nr

## DT with Pre-Pruning

### Training and nested-cross validation

In [None]:
n = 100
acc_preprun = []
precision_preprun = []
recall_preprun = []
f1_preprun = []
max_depth_preprun = []
min_samples_split_preprun = []
min_samples_leaf_preprun = []

# Initialize a list to store feature importance values for each split
feature_importances_preprun = []

# Define the objective function for optimization - the average cross-validation loss
def objective(params):
    max_depth, min_samples_split, min_samples_leaf = params
    clf_preprun = DecisionTreeClassifier(
        random_state=0, 
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf
    )
    return -np.mean(cross_val_score(clf_preprun, X_train, y_train, cv=5, n_jobs=-1, scoring="f1"))

for rs in range(n):
    X_train, y_train, X_test, y_test = stratified_sampling(X_pos, X_neg, y_pos, y_neg, rs * 1234 + 567)
    X_train, y_train = resampling(X_train, y_train, overratio=0.6, underratio=1, randomstate=123)

    space = [
        Integer(1, 10, name='max_depth'),
        Integer(2, 20, name='min_samples_split'),
        Integer(1, 20, name='min_samples_leaf')
    ]
    
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        res_gp = gp_minimize(objective, space, n_calls=30, random_state=0, n_initial_points=10)

    clf_preprun_optim = DecisionTreeClassifier(
        random_state=0, 
        max_depth=res_gp.x[0], 
        min_samples_split=res_gp.x[1], 
        min_samples_leaf=res_gp.x[2]
    )
    max_depth_preprun.append(res_gp.x[0])
    min_samples_split_preprun.append(res_gp.x[1])
    min_samples_leaf_preprun.append(res_gp.x[2])

    clf_preprun_optim.fit(X_train, y_train)
    y_pred = clf_preprun_optim.predict(X_test)

    acc_preprun.append(accuracy_score(y_test, y_pred))
    precision_preprun.append(precision_score(y_test, y_pred, zero_division=1))
    recall_preprun.append(recall_score(y_test, y_pred))
    f1_preprun.append(f1_score(y_test, y_pred)) 
    
    # Store feature importance for this split
    feature_importances_preprun.append(clf_preprun_optim.feature_importances_)

    if rs % 10 == 0:
        print("Split %s" % rs)
    # Show an example tree
    if rs == 1:
        plt.figure(figsize=(24, 12))
        plot_tree(clf_preprun_optim, filled=False, fontsize=6, feature_names=feature_names)
        plt.title("Decision tree with pre-pruning on training set (random split)")
        plt.show()



### Performance metrics and importance scores for pre-pruned decision tree

In [None]:
df_metrics = get_formatted_results(acc_preprun, f1_preprun, precision_preprun, recall_preprun, model_name="DT prepruned", verbose=True, df_metrics=df_metrics)
df_metrics

In [None]:
df_feature_importance = plot_feature_importance(feature_importances_preprun, feature_names, model_name="DT prepruned", df_feature_importance=df_feature_importance, savedir='../figures')
df_feature_importance

In [None]:
plot_feature_importance_distribution(feature_importances_preprun, feature_names, "Decision Tree with prepruning", color='gray', savedir='../figures')

## Pre-Pruned DT without Resampling

### Training and nested-cross validation

In [None]:
 n = 100

# Initialize lists to store evaluation metrics
acc_preprun_nr = []
precision_preprun_nr = []
recall_preprun_nr = []
f1_preprun_nr = []
max_depth_preprun_nr = []
min_samples_split_preprun_nr = []
min_samples_leaf_preprun_nr = []

# Initialize a list to store feature importance values for each split
feature_importances_preprun_nr = []

# Define the objective function for optimization - the average cross-validation loss
def objective(params):
    max_depth, min_samples_split, min_samples_leaf = params
    clf_preprun_nr = DecisionTreeClassifier(
        random_state=0, 
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf
    )
    return -np.mean(cross_val_score(clf_preprun_nr, X_train, y_train, cv=5, n_jobs=-1, scoring="f1"))

for rs in range(n):
    X_train, y_train, X_test, y_test = stratified_sampling(X_pos, X_neg, y_pos, y_neg, rs * 1234 + 567)

    space = [
        Integer(1, 10, name='max_depth'),
        Integer(2, 20, name='min_samples_split'),
        Integer(1, 20, name='min_samples_leaf')
    ]
    
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        res_gp = gp_minimize(objective, space, n_calls=30, random_state=0, n_initial_points=10)

    clf_preprun_optim_nr = DecisionTreeClassifier(
        random_state=0, 
        max_depth=res_gp.x[0], 
        min_samples_split=res_gp.x[1], 
        min_samples_leaf=res_gp.x[2]
    )
    max_depth_preprun_nr.append(res_gp.x[0])
    min_samples_split_preprun_nr.append(res_gp.x[1])
    min_samples_leaf_preprun_nr.append(res_gp.x[2])

    clf_preprun_optim_nr.fit(X_train, y_train)
    y_pred_nr = clf_preprun_optim_nr.predict(X_test)

    acc_preprun_nr.append(accuracy_score(y_test, y_pred_nr))
    precision_preprun_nr.append(precision_score(y_test, y_pred_nr, zero_division=1))
    recall_preprun_nr.append(recall_score(y_test, y_pred_nr))
    f1_preprun_nr.append(f1_score(y_test, y_pred_nr)) 
    
    # Store feature importance for this split
    feature_importances_preprun_nr.append(clf_preprun_optim_nr.feature_importances_)
    if rs % 25 == 0:
        print("Split %s" % rs)




In [None]:
df_metrics_nr = get_formatted_results(acc_preprun_nr, f1_preprun_nr, precision_preprun_nr, recall_preprun_nr, model_name="DT prepruned", verbose=True, df_metrics = df_metrics_nr)
df_metrics_nr

In [None]:
df_feature_importance_nr = plot_feature_importance(feature_importances_preprun_nr, feature_names, model_name="DT prepruned", df_feature_importance=df_feature_importance_nr, savedir='../figures')
df_feature_importance_nr

## Decision tree with post-pruning

### Training and nested evaluation

In [None]:
n = 100
acc_postprun = []
precision_postprun = []
recall_postprun = []
f1_postprun =[]
alpha_postprun =[]
feature_importances_postprun = []

for rs in range(n):
    X_train, y_train, X_test, y_test = stratified_sampling(X_pos, X_neg, y_pos, y_neg, rs * 1234 + 567)
    X_train, y_train = resampling(X_train, y_train, overratio=0.6, underratio=1, randomstate=123)

    clf = DecisionTreeClassifier(random_state=0).fit(X_train, y_train)
    path = clf.cost_complexity_pruning_path(X_train, y_train)
    ccp_alphas, impurities = path.ccp_alphas, path.impurities
    clfs_ = []
    for ccp_alpha in ccp_alphas:
        clf_ = tree.DecisionTreeClassifier(random_state=0, ccp_alpha=ccp_alpha)
        clf_.fit(X_train, y_train)
        clfs_.append(clf_)

    clfs_ = clfs_[:-1]
    ccp_alphas = ccp_alphas[:-1]

    train_acc = []
    test_acc = []
    for c in clfs_:
        y_train_pred = c.predict(X_train)
        y_test_pred = c.predict(X_test)
        train_acc.append(accuracy_score(y_train, y_train_pred))
        test_acc.append(accuracy_score(y_test, y_test_pred))

    alpha = ccp_alphas[np.argmax(test_acc)]
    alpha_postprun.append(alpha)
    clf_postprun = tree.DecisionTreeClassifier(random_state=0, ccp_alpha=alpha)
    clf_postprun.fit(X_train, y_train)
    y_pred_postprun = clf_postprun.predict(X_test)

    acc_postprun.append(accuracy_score(y_test, y_pred_postprun))
    precision_postprun.append(precision_score(y_test, y_pred_postprun, zero_division=1))
    recall_postprun.append(recall_score(y_test, y_pred_postprun))
    f1_postprun.append(f1_score(y_test, y_pred_postprun))

    # Store feature importance for this split
    feature_importances_postprun.append(clf_postprun.feature_importances_)

    if rs % 50 == 0:
        print("Split %s" % rs)

    # Show an example tree
    if rs == 1:
        plt.figure(figsize=(24, 12))
        plot_tree(clf_postprun, filled=False, fontsize=6, feature_names=feature_names)
        plt.title("Decision tree with post-pruning on training set (random split)")
        plt.show()



In [None]:
### Performance metrics and importance scores for post-pruned decision tree

In [None]:
df_metrics = get_formatted_results(acc_postprun, f1_postprun, precision_postprun, recall_postprun, model_name="DT postpruned", verbose=True, df_metrics=df_metrics)
df_metrics 

In [None]:
df_feature_importance = plot_feature_importance(feature_importances_postprun, feature_names, model_name="DT postpruned", df_feature_importance=df_feature_importance, savedir='../figures')
df_feature_importance

In [None]:
plot_feature_importance_distribution(feature_importances_postprun, feature_names, "Decision Tree with postpruning", color='gray', savedir='../figures')

In [None]:
df_melted = pd.melt(df_feature_importance, id_vars='Feature', var_name='Model', value_name='Importance')

# Create a grouped bar plot using Plotly Express
fig = px.bar(df_melted, x='Feature', y='Importance', color='Model',
             labels={'Importance': 'Feature Importance', 'Model': 'Model'},
             title='Feature Importance Comparison',
             barmode='group'  # Set barmode to 'group' for grouped bars
            )  # Set the width of the bars

# Adjust the width of the bars in the layout
fig.update_layout(bargap=0.3, bargroupgap=0.3)


# Show the plot
fig.show()

## Postprune without Resampling

### Training and nested evaluation

In [None]:
# Set the number of iterations
n = 100

# Initialize lists for metrics and feature importances
acc_postprun_nr = []
precision_postprun_nr = []
recall_postprun_nr = []
f1_postprun_nr = []
alpha_postprun_nr = []
feature_importances_postprun_nr = []

# Loop through iterations
for rs in range(n):
    # Stratified sampling without resampling
    X_train, y_train, X_test, y_test = stratified_sampling(X_pos, X_neg, y_pos, y_neg, rs * 1234 + 567)
    
    # Decision tree without resampling
    clf = DecisionTreeClassifier(random_state=0).fit(X_train, y_train)
    
    # Cost complexity pruning
    path = clf.cost_complexity_pruning_path(X_train, y_train)
    ccp_alphas, impurities = path.ccp_alphas, path.impurities
    clfs_ = [tree.DecisionTreeClassifier(random_state=0, ccp_alpha=alpha).fit(X_train, y_train) for alpha in ccp_alphas[:-1]]

    # Calculate accuracies for different alphas
    train_acc = [accuracy_score(y_train, clf.predict(X_train)) for clf in clfs_]
    test_acc = [accuracy_score(y_test, clf.predict(X_test)) for clf in clfs_]
    
    # Select the alpha with the highest test accuracy
    alpha = ccp_alphas[np.argmax(test_acc)]
    alpha_postprun_nr.append(alpha)
    
    # Fit a decision tree with the selected alpha
    clf_postprun = tree.DecisionTreeClassifier(random_state=0, ccp_alpha=alpha).fit(X_train, y_train)
    y_pred_postprun = clf_postprun.predict(X_test)

    # Calculate evaluation metrics and store them
    acc_postprun_nr.append(accuracy_score(y_test, y_pred_postprun))
    precision_postprun_nr.append(precision_score(y_test, y_pred_postprun, zero_division=1))
    recall_postprun_nr.append(recall_score(y_test, y_pred_postprun))
    f1_postprun_nr.append(f1_score(y_test, y_pred_postprun))

    # Store feature importance for this split
    feature_importances_postprun_nr.append(clf_postprun.feature_importances_)

    if rs % 25 == 0:
        print("Split %s" % rs)


In [None]:
df_metrics_nr = get_formatted_results(acc_postprun_nr, f1_postprun_nr, precision_postprun_nr, recall_postprun_nr, model_name="DT postpruned", verbose=True, df_metrics = df_metrics_nr)


In [None]:
df_feature_importance_nr = plot_feature_importance(feature_importances_postprun_nr, feature_names, model_name="DT postpruned", df_feature_importance=df_feature_importance_nr, savedir='../figures')
df_feature_importance_nr

In [None]:
df_melted_nr = pd.melt(df_feature_importance_nr, id_vars='Feature', var_name='Model', value_name='Importance')

# Create a grouped bar plot using Plotly Express
fig = px.bar(df_melted_nr, x='Feature', y='Importance', color='Model',
             labels={'Importance': 'Feature Importance', 'Model': 'Model'},
             title='Feature Importance Comparison without Resampling',
             barmode='group'  # Set barmode to 'group' for grouped bars
            )  # Set the width of the bars

# Adjust the width of the bars in the layout
fig.update_layout(bargap=0.3, bargroupgap=0.3)


# Show the plot
fig.show()

### Save decision tree evaluation and importance score results

In [None]:
if not os.path.exists('../results'):
    os.mkdir('../results')
df_metrics.to_csv('../results/DT_metrics_results.csv', index=False)
df_metrics_nr.to_csv('../results/DT_metrics_NO_Resampling_results.csv', index=False)
df_feature_importance.to_csv('../results/DT_feature_imp_with_sklearn_results.csv', index=False)
df_feature_importance_nr.to_csv('../results/DT_feature_imp_with_sklearn_NO_Resampling_results.csv', index=False)


## DONE