# Import Libraries and Load Data


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.tree import export_text, plot_tree

# Load and Explore Data


In [None]:
# Set a random seed for reproducibility
np.random.seed(42)

# Generate synthetic dataset
X, y = make_classification(
    n_samples=1000,
    n_features=7,
    n_informative=5,
    n_redundant=2,
    n_clusters_per_class=1,
    weights=[0.8, 0.2],
    flip_y=0,
    random_state=42,
)

# Create a DataFrame
columns = [
    'Income',
    'Credit_Score',
    'Debt_to_Income_Ratio',
    'Loan_Amount',
    'Employment_History',
    'Previous_Loan_Repayment_History',
    'Other_Factor',
]
df = pd.DataFrame(X, columns=columns)

# Add the target variable 'Loan_Default' (binary)
df['Loan_Default'] = y

# Data Preprocessing


In [None]:
# Split the dataset into training and testing sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Initialize Decision Tree model
dt_model = DecisionTreeClassifier(random_state=42)

# Separate features and target variable
X_train = train_df.drop('Loan_Default', axis=1)
y_train = train_df['Loan_Default']
X_test = test_df.drop('Loan_Default', axis=1)
y_test = test_df['Loan_Default']

# Model Training


In [None]:
# Train the Decision Tree model
dt_model.fit(X_train, y_train)

# Model Evaluation


In [None]:
# Make predictions on the test set
y_pred = dt_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

# Visualizations


In [None]:
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
            xticklabels=['Non-Default', 'Default'], yticklabels=['Non-Default', 'Default'])
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()

# Plot the Decision Tree structure graphically
plt.figure(figsize=(12, 8))
plot_tree(dt_model, feature_names=columns[:-1], class_names=['Non-Default', 'Default'],
          filled=True, rounded=True, max_depth=3)  # Adjust max_depth as needed
plt.title("Decision Tree Structure")
plt.show()