In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Example dataset (replace with actual bank data)
data = {
    'ApplicantID': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'Credit_Score': [750, 680, 590, 610, 720, 680, 700, 580, 610, 740],
    'Income': [50000, 60000, 30000, 40000, 55000, 62000, 58000, 32000, 41000, 53000],
    'Loan_Amount': [20000, 25000, 15000, 18000, 21000, 23000, 20000, 16000, 19000, 22000],
    'Loan_Term': [36, 60, 24, 36, 48, 60, 36, 24, 36, 48],
    'Risk': ['low', 'low', 'high', 'high', 'low', 'low', 'low', 'high', 'high', 'low']
}

# Create a DataFrame
df = pd.DataFrame(data)

# Display the dataset
print("Dataset:")
print(df)

# Define features (X) and target (y)
X = df[['Credit_Score', 'Income', 'Loan_Amount', 'Loan_Term']]
y = df['Risk']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Random Forest Classifier model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print("\nModel Evaluation:")
print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(report)
print("Confusion Matrix:")
print(conf_matrix)

# Example: Predict risk for a new applicant
new_applicant = pd.DataFrame({
    'Credit_Score': [710],
    'Income': [54000],
    'Loan_Amount': [20000],
    'Loan_Term': [36]
})
new_prediction = model.predict(new_applicant)

print("\nPredicted Risk for New Applicant:")
print(new_prediction)


Dataset:
   ApplicantID  Credit_Score  Income  Loan_Amount  Loan_Term  Risk
0            1           750   50000        20000         36   low
1            2           680   60000        25000         60   low
2            3           590   30000        15000         24  high
3            4           610   40000        18000         36  high
4            5           720   55000        21000         48   low
5            6           680   62000        23000         60   low
6            7           700   58000        20000         36   low
7            8           580   32000        16000         24  high
8            9           610   41000        19000         36  high
9           10           740   53000        22000         48   low

Model Evaluation:
Accuracy: 1.0
Classification Report:
              precision    recall  f1-score   support

        high       1.00      1.00      1.00         1
         low       1.00      1.00      1.00         1

    accuracy                      