In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer

# Load dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/credit-screening/crx.data"
column_names = [
    "A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "A10",
    "A11", "A12", "A13", "A14", "A15", "Loan_Status"
]
data = pd.read_csv(url, header=None, names=column_names)

# Preprocessing
data.replace('?', pd.NA, inplace=True)
data.dropna(inplace=True)
label_encoder = LabelEncoder()
data['Loan_Status'] = label_encoder.fit_transform(data['Loan_Status'])
X = data.drop('Loan_Status', axis=1)
y = data['Loan_Status']

# Separate categorical and numeric features
categorical_cols = X.select_dtypes(include=['object']).columns.tolist()
numeric_cols = X.select_dtypes(include=['int64', 'float64']).columns.tolist()

# Apply one-hot encoding to categorical features
preprocessor = ColumnTransformer(
    transformers=[('cat', OneHotEncoder(), categorical_cols)],
    remainder='passthrough'
)
X = preprocessor.fit_transform(X)

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Make predictions
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=label_encoder.classes_)

# Display results
print("Loan Default Prediction using Random Forest Classifier")
print("-" * 50)
print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(report)

Loan Default Prediction using Random Forest Classifier
--------------------------------------------------
Accuracy: 0.85
Classification Report:
              precision    recall  f1-score   support

           +       0.81      0.84      0.82        55
           -       0.88      0.86      0.87        76

    accuracy                           0.85       131
   macro avg       0.84      0.85      0.84       131
weighted avg       0.85      0.85      0.85       131

