In [43]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [44]:
# Step 1: Data Preprocessing
# Read the dataset
data = pd.read_csv("../Datasets/ETL_Completed_LoanApprovalPrediction.csv")

In [45]:
# Step 2: Splitting the Data
X = data.drop(columns=['Loan_Status'])
y = data['Loan_Status']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [46]:
# Step 3: Building the Random Forest Model
rf_model = RandomForestClassifier(n_estimators=200, random_state=42)

In [47]:
# Step 4: Training the Model
rf_model.fit(X_train, y_train)

In [48]:
# Step 5: Evaluating the Model
y_pred = rf_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Random Forest Model Accuracy: {accuracy:.4f}')

# Print classification report and confusion matrix
print('\nClassification Report:')
print(classification_report(y_test, y_pred))

print('\nConfusion Matrix:')
print(confusion_matrix(y_test, y_pred))

Random Forest Model Accuracy: 0.8911

Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.64      0.74        25
           1       0.89      0.97      0.93        76

    accuracy                           0.89       101
   macro avg       0.89      0.81      0.84       101
weighted avg       0.89      0.89      0.88       101


Confusion Matrix:
[[16  9]
 [ 2 74]]


In [49]:
# Define categorical columns for user input
categorical_columns = ['Gender', 'Married', 'Education', 'Self_Employed', 'Property_Area']

In [60]:
print("Please enter the following details for loan eligibility prediction:")
user_data = {}
for col in X.columns:
    user_input = input(f"{col}: ")
    user_data[col] = [user_input]
user_df = pd.DataFrame(user_data)
# Convert categorical variables into numerical using LabelEncoder
label_encoder = LabelEncoder()
for col in categorical_columns:
    user_df[col] = label_encoder.fit_transform(user_df[col])
# Ensure the columns in user_df match the columns in X_train
missing_cols = set(X_train.columns) - set(user_df.columns)
for col in missing_cols:
    user_df[col] = 0
user_df = user_df[X_train.columns]  # Reorder the columns to match X_train
# Step 5: Predicting Loan Eligibility for User Input
user_pred = rf_model.predict(user_df)
if user_pred[0] == 1:
    print("Congratulations! Your loan application is APPROVED.")
else:
    print("We regret to inform you that your loan application is DENIED.")

Please enter the following details for loan eligibility prediction:
We regret to inform you that your loan application is DENIED.
