                                                *Real-Time Loan Eligibility Prediction*
                                                ---------------------------------------


In [17]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import warnings

warnings.filterwarnings('ignore')

In [23]:
#  Load data
train_Data = pd.read_csv("train_loan_eligibility_cleaned.csv")
test_Data = pd.read_csv("test_loan_eligibility_cleaned.csv")
train_Data.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001535,Male,No,0.0,Graduate,No,3254,0.0,50.0,360.0,1.0,Urban,Y
1,LP001792,Male,Yes,1.0,Graduate,No,3315,0.0,96.0,360.0,1.0,Semiurban,Y
2,LP002443,Male,Yes,2.0,Graduate,No,3340,1710.0,150.0,360.0,0.0,Rural,N
3,LP002517,Male,Yes,1.0,Not Graduate,No,2653,1500.0,113.0,180.0,0.0,Rural,N
4,LP001894,Male,Yes,0.0,Graduate,No,2620,2223.0,150.0,360.0,1.0,Semiurban,Y


In [27]:
test_Data.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area
0,LP001528,Male,No,0,Graduate,No,6277,0.0,118.0,360.0,0.0,Rural
1,LP002804,Male,Yes,0,Graduate,No,4180,2306.0,182.0,360.0,1.0,Semiurban
2,LP002945,Male,Yes,0,Graduate,Yes,9963,0.0,180.0,360.0,1.0,Rural
3,LP001784,Male,Yes,1,Graduate,No,5500,1260.0,170.0,360.0,1.0,Rural
4,LP001514,Male,Yes,0,Graduate,No,2330,4486.0,100.0,360.0,1.0,Semiurban


# Combine datasets

In [30]:
train_Data["source"] = "train"
test_Data["source"] = "test"
test_Data["Loan_Status"] = np.nan # placeholder
data = pd.concat([train_Data, test_Data], ignore_index=True)

# Handle missing values

In [35]:
for col in ["Gender", "Married", "Dependents", "Self_Employed", "Credit_History"]:
    data[col].fillna(data[col].mode()[0], inplace=True)

for col in ["LoanAmount", "Loan_Amount_Term"]:
    data[col].fillna(data[col].median(), inplace=True)

# Feature Engineering

In [40]:
data["Dependents"] = data["Dependents"].replace("3+", 3).astype(int)

# Label encode categorical features


In [45]:
cat_cols = ["Gender", "Married", "Education", "Self_Employed", "Property_Area", "Loan_Status"]
le = LabelEncoder()
for col in cat_cols:
    data[col] = le.fit_transform(data[col].astype(str))

In [47]:
data[col]

0      1
1      1
2      0
3      0
4      1
      ..
609    2
610    2
611    2
612    2
613    2
Name: Loan_Status, Length: 614, dtype: int32

# Split back into train/test

In [50]:
train_df = data[data["source"] == "train"].drop("source", axis=1)
test_df = data[data["source"] == "test"].drop(["source", "Loan_Status"], axis=1)

In [52]:
X_train = train_df.drop(["Loan_ID", "Loan_Status"], axis=1)
y_train = train_df["Loan_Status"]
X_test = test_df.drop("Loan_ID", axis=1)

# Train model

In [55]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate on training set

In [60]:
y_train_pred = model.predict(X_train)
accuracy = accuracy_score(y_train, y_train_pred)
print(f" Training Accuracy: {accuracy:.4f}")

 Training Accuracy: 1.0000


# Predict on test data

In [63]:
test_pred = model.predict(X_test)

In [65]:
test_pred

array([0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0])

# Save predictions to submission file

In [92]:
submission = pd.DataFrame({
"Loan_ID": test_df["Loan_ID"],
"Loan_Status": np.where(test_pred == 1, "Y", "N")
})
submission.to_csv("submission.csv", index=False)
print("submission.csv file generated successfully!")

submission.csv file generated successfully!


In [82]:
print(submission)

      Loan_ID Loan_Status
491  LP001528           N
492  LP002804           Y
493  LP002945           N
494  LP001784           Y
495  LP001514           Y
..        ...         ...
609  LP002178           Y
610  LP001910           N
611  LP001014           N
612  LP001935           Y
613  LP002205           N

[123 rows x 2 columns]


In [90]:
submission.to_csv("submission.csv", index=False)
print("submission.csv file generated successfully!")

submission.csv file generated successfully!
