In [9]:
# Model Development Notebook Content

# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

In [10]:
# Load the dataset
df = pd.read_csv('../data/raw/synthetic_debt_data.csv')

# Encode categorical variables
label_encoder = LabelEncoder()
df['Repayment_Strategy'] = label_encoder.fit_transform(df['Repayment_Strategy'])

In [11]:
# Splitting the features and target variable
X = df.drop('Repayment_Strategy', axis=1)
y = df['Repayment_Strategy']

# Split the data into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_validate, X_test, y_validate, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [12]:
# Initialize and train the decision tree model
dtree = DecisionTreeClassifier(random_state=42)
dtree.fit(X_train, y_train)

# Predict on the validation set and calculate accuracy
y_pred_validate = dtree.predict(X_validate)
validate_accuracy = accuracy_score(y_validate, y_pred_validate)
print(f"Validation Accuracy: {validate_accuracy:.2f}")

# Examine feature importance
feature_importance = pd.Series(dtree.feature_importances_, index=X.columns).sort_values(ascending=False)
print("Feature Importances:\n", feature_importance)

Validation Accuracy: 0.36
Feature Importances:
 Additional_Payment            0.160981
Initial_Debt                  0.150940
Monthly_Expenses              0.150544
Minimum_Payment_Percentage    0.146714
Interest_Rate                 0.141000
Monthly_Income                0.130601
User_ID                       0.119220
dtype: float64
