In [None]:
# Import necessary libraries
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import graphviz
from sklearn.tree import DecisionTreeClassifier, export_text, export_graphviz
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, roc_auc_score

# Set Graphviz Path Manually (If No Admin Access)
os.environ["PATH"] += os.pathsep + r"C:\Users\maths\Downloads\windows_10_cmake_Release_Graphviz-12.2.1-win64\Graphviz-12.2.1-win64\bin"

# Step 1: Load dataset
data = pd.read_csv("German Credit Data (1).csv")

# Step 2: Convert categorical data into numeric format
data = data.apply(lambda col: col.astype('category').cat.codes if col.dtype == 'object' else col)

# Step 3: Define features and target variable
target_col = "status"
X = data.drop(columns=[target_col])
y = data[target_col]

# Step 4: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Train Decision Tree with Hyperparameter Tuning
param_grid = {'criterion': ['gini', 'entropy'], 'max_depth': np.arange(2, 11)}
grid_search = GridSearchCV(DecisionTreeClassifier(), param_grid, scoring='roc_auc', cv=10)
grid_search.fit(X_train, y_train)

# Step 6: Extract the best model
best_tree = grid_search.best_estimator_
print("Best parameters for the Decision Tree:", grid_search.best_params_)

# Step 7: Evaluate Model Performance
y_pred = best_tree.predict(X_test)
y_prob = best_tree.predict_proba(X_test)[:, 1]
print("ROC AUC Score:", roc_auc_score(y_test, y_prob))
print("Model Performance Summary:\n", classification_report(y_test, y_pred))

# Step 8: Visualizing the Decision Tree
dot_data = export_graphviz(
    best_tree, filled=True, rounded=True, feature_names=X.columns, class_names=[str(cls) for cls in best_tree.classes_]
)
graph = graphviz.Source(dot_data)
display(graph)  # Display the tree in Jupyter Notebook

# Save the visualization as a PDF file
graph.render("decision_tree")
print("Decision Tree saved as 'decision_tree.pdf'. Open it to view.")

# Step 9: Print Decision Tree Rules
tree_rules = export_text(best_tree, feature_names=list(X.columns))
print("Decision Tree Rules:\n", tree_rules)


Best parameters for the Decision Tree: {'criterion': 'gini', 'max_depth': 3}
ROC AUC Score: 0.7630123812958289
Model Performance Summary:
               precision    recall  f1-score   support

           0       0.80      0.89      0.84       141
           1       0.64      0.47      0.54        59

    accuracy                           0.77       200
   macro avg       0.72      0.68      0.69       200
weighted avg       0.75      0.77      0.75       200

