In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, export_text
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
from sklearn.tree import plot_tree

# Drop BELNR (Account Document Number) since it's just an identifier
df = df.drop(columns=['BELNR'])

# Identify categorical and numerical columns

categorical_cols = ['WAERS', 'BUKRS', 'KTOSL', 'PRCTR', 'BSCHL', 'HKONT']
numerical_cols = ['DMBTR', 'WRBTR']
target_col = 'label'

# Encode categorical variables using Label Encoding (since Decision Trees handle numeric values better)
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])  # Convert categories into numeric values
    label_encoders[col] = le  # Store encoder for later interpretation

# Convert target variable to binary (if it's not already)
df[target_col] = df[target_col].map({'regular': 0, 'anomal': 1})

# Define features and target
X = df.drop(columns=[target_col])  # Features (all columns except target)
y = df[target_col]  # Target variable (0 = regular, 1 = anomal)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Decision Tree model
tree_clf = DecisionTreeClassifier(random_state=42, max_depth=5)
tree_clf.fit(X_train, y_train)

# Evaluate the model
y_pred = tree_clf.predict(X_test)
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Extract decision rules
print("\nDecision Tree Rules:")
tree_rules = export_text(tree_clf, feature_names=X.columns.tolist())
print(tree_rules)

# Visualize the tree
plt.figure(figsize=(15, 10))
plot_tree(tree_clf, feature_names=X.columns.tolist(), class_names=['Regular', 'Anomal'], filled=True, fontsize=10)
plt.show()
