In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load dataset
df = pd.read_csv("loan_prediction.csv")

# Drop Loan_ID (not useful)
df.drop("Loan_ID", axis=1, inplace=True)

In [None]:
# Separate numerical and categorical columns
num_cols = df.select_dtypes(include=['int64', 'float64']).columns.tolist()
cat_cols = df.select_dtypes(include=['object']).columns.tolist()
cat_cols.remove("Loan_Status")  

In [14]:

# Handle missing values
num_imputer = SimpleImputer(strategy='median')
df[num_cols] = num_imputer.fit_transform(df[num_cols])

cat_imputer = SimpleImputer(strategy='most_frequent')
df[cat_cols + ['Loan_Status']] = cat_imputer.fit_transform(df[cat_cols + ['Loan_Status']])

# Label encode categorical columns
label_encoders = {}
for col in cat_cols + ['Loan_Status']:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Split features and target
X = df.drop("Loan_Status", axis=1)
y = df["Loan_Status"]

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("\n📊 Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\n📝 Classification Report:\n", classification_report(y_test, y_pred))

# ------------------------------------
# ✅ Prediction function for new data
# ------------------------------------
def predict_loan_status(applicant_data):
    input_df = pd.DataFrame([applicant_data])

    # Impute missing numeric values
    input_df[num_cols] = num_imputer.transform(input_df[num_cols])

    # Label encode categorical columns
    for col in cat_cols:
        input_df[col] = label_encoders[col].transform(input_df[col])

    # Predict
    pred = model.predict(input_df)[0]
    decision = label_encoders["Loan_Status"].inverse_transform([pred])[0]

    if decision == 'Y':
        print("✅ Loan Approved")
    else:
        print("❌ Loan Rejected")


✅ Accuracy: 0.7560975609756098

📊 Confusion Matrix:
 [[18 25]
 [ 5 75]]

📝 Classification Report:
               precision    recall  f1-score   support

           0       0.78      0.42      0.55        43
           1       0.75      0.94      0.83        80

    accuracy                           0.76       123
   macro avg       0.77      0.68      0.69       123
weighted avg       0.76      0.76      0.73       123



In [13]:
# ------------------------------------
# 🔍 Make a sample prediction
# ------------------------------------

In [10]:
predict_loan_status({
    'Gender': 'Male',
    'Married': 'Yes',
    'Dependents': '1',
    'Education': 'Graduate',
    'Self_Employed': 'No',
    'ApplicantIncome': 5000,
    'CoapplicantIncome': 2000.0,
    'LoanAmount': 150.0,
    'Loan_Amount_Term': 360.0,
    'Credit_History': 1.0,
    'Property_Area': 'Urban'
})

✅ Loan Approved


In [11]:
predict_loan_status({
    'Gender': 'Female',
    'Married': 'No',
    'Dependents': '0',
    'Education': 'Not Graduate',
    'Self_Employed': 'Yes',
    'ApplicantIncome': 1000,          
    'CoapplicantIncome': 0.0,
    'LoanAmount': 250.0,               
    'Loan_Amount_Term': 360.0,
    'Credit_History': 0.0,            
    'Property_Area': 'Rural'
})



❌ Loan Rejected
