In [45]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [47]:

file_path = 'D:\\loan_approval_dataset.csv'
loan_data = pd.read_csv(file_path)



In [91]:
loan_data.head()

Unnamed: 0,loan_id,no_of_dependents,education,self_employed,income_annum,loan_amount,loan_term,cibil_score,residential_assets_value,commercial_assets_value,luxury_assets_value,bank_asset_value,loan_status
0,1,2,0,0,9600000,29900000,12,778,2400000,17600000,22700000,8000000,0
1,2,0,1,1,4100000,12200000,8,417,2700000,2200000,8800000,3300000,1
2,3,3,0,0,9100000,29700000,20,506,7100000,4500000,33300000,12800000,1
3,4,3,0,0,8200000,30700000,8,467,18200000,3300000,23300000,7900000,1
4,5,5,1,1,9800000,24200000,20,382,12400000,8200000,29400000,5000000,1


In [49]:

loan_data.columns = loan_data.columns.str.strip()


label_encoder = LabelEncoder()
loan_data['education'] = label_encoder.fit_transform(loan_data['education'])
loan_data['self_employed'] = label_encoder.fit_transform(loan_data['self_employed'])
loan_data['loan_status'] = label_encoder.fit_transform(loan_data['loan_status'])  # Target variable (0 = Rejected, 1 = Approved)


In [51]:

X = loan_data.drop('loan_status', axis=1)  # Features
y = loan_data['loan_status']  # Target



In [53]:
X

Unnamed: 0,loan_id,no_of_dependents,education,self_employed,income_annum,loan_amount,loan_term,cibil_score,residential_assets_value,commercial_assets_value,luxury_assets_value,bank_asset_value
0,1,2,0,0,9600000,29900000,12,778,2400000,17600000,22700000,8000000
1,2,0,1,1,4100000,12200000,8,417,2700000,2200000,8800000,3300000
2,3,3,0,0,9100000,29700000,20,506,7100000,4500000,33300000,12800000
3,4,3,0,0,8200000,30700000,8,467,18200000,3300000,23300000,7900000
4,5,5,1,1,9800000,24200000,20,382,12400000,8200000,29400000,5000000
...,...,...,...,...,...,...,...,...,...,...,...,...
4264,4265,5,0,1,1000000,2300000,12,317,2800000,500000,3300000,800000
4265,4266,0,1,1,3300000,11300000,20,559,4200000,2900000,11000000,1900000
4266,4267,2,1,0,6500000,23900000,18,457,1200000,12400000,18100000,7300000
4267,4268,1,1,0,4100000,12800000,8,780,8200000,700000,14100000,5800000


In [55]:
y

0       0
1       1
2       1
3       1
4       1
       ..
4264    1
4265    0
4266    1
4267    0
4268    0
Name: loan_status, Length: 4269, dtype: int32

In [57]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [59]:
# Scale the data (feature scaling for numerical stability)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)



In [61]:
X_train 

array([[-0.38682838,  1.51250774,  1.00263891, ...,  2.04678575,
         0.07808278,  1.16041374],
       [-0.80430304, -1.43500078,  1.00263891, ...,  1.22311091,
         2.49843196,  0.88201987],
       [-1.59840356, -0.84549907, -0.99736803, ..., -0.8818359 ,
        -1.33923881, -1.31419838],
       ...,
       [ 0.7708264 ,  0.92300603,  1.00263891, ...,  1.29175048,
         1.47359943,  0.13963624],
       [ 1.32636998, -0.25599737,  1.00263891, ..., -0.83607619,
         0.50327926,  1.4388076 ],
       [-1.0526637 ,  0.92300603, -0.99736803, ..., -0.28695963,
         1.03750048, -0.10782497]])

In [63]:
X_test

array([[-0.36395305,  1.51250774, -0.99736803, ...,  1.15447134,
         0.05627784,  0.72735662],
       [-0.79695025, -0.25599737, -0.99736803, ...,  1.04007205,
         0.28522978,  0.54176071],
       [-1.50363436,  0.33350433, -0.99736803, ...,  2.57302245,
         1.45179448,  0.51082806],
       ...,
       [-0.54368774,  1.51250774,  1.00263891, ...,  0.49095549,
        -0.37982112,  0.88201987],
       [-1.20952306,  0.92300603,  1.00263891, ..., -0.83607619,
        -1.0121646 , -1.09766982],
       [-0.84515183, -0.84549907,  1.00263891, ...,  0.14775765,
        -0.59787059, -0.97393921]])

In [65]:

model = LogisticRegression(random_state=42)
model.fit(X_train, y_train)



In [67]:

y_pred = model.predict(X_test)




In [69]:
y_pred

array([1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0,
       1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0,
       1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1,
       0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1,
       0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0,
       0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1,
       0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1,
       0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1,

In [25]:

accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)



In [27]:

print(f"Accuracy: {accuracy * 100:.2f}%")
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

Accuracy: 97.78%
Confusion Matrix:
 [[529   7]
 [ 12 306]]
Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.99      0.98       536
           1       0.98      0.96      0.97       318

    accuracy                           0.98       854
   macro avg       0.98      0.97      0.98       854
weighted avg       0.98      0.98      0.98       854



In [97]:
# Manually create an example with feature values including loan_id for reference
example_data = {
    'loan_id': [6],  # New loan ID for reference
    'no_of_dependents': [3],  # e.g., 3 dependents
    'education': [0],  # Graduate (0)
    'self_employed': [0],  # No (0)
    'income_annum': [9100000],  # Annual income of 9,100,000
    'loan_amount': [29700000],  # Loan amount 29,700,000
    'loan_term': [20],  # Loan term 20 months
    'cibil_score': [506],  # CIBIL score 506
    'residential_assets_value': [7100000],  # Residential assets value
    'commercial_assets_value': [4500000],  # Commercial assets value
    'luxury_assets_value': [33300000],  # Luxury assets value
    'bank_asset_value': [12800000]  # Bank asset value
}



In [99]:

example_df = pd.DataFrame(example_data)




In [101]:
example_df

Unnamed: 0,loan_id,no_of_dependents,education,self_employed,income_annum,loan_amount,loan_term,cibil_score,residential_assets_value,commercial_assets_value,luxury_assets_value,bank_asset_value
0,6,3,0,0,9100000,29700000,20,506,7100000,4500000,33300000,12800000


In [135]:

example_features = example_df



In [137]:
example_features

Unnamed: 0,loan_id,no_of_dependents,education,self_employed,income_annum,loan_amount,loan_term,cibil_score,residential_assets_value,commercial_assets_value,luxury_assets_value,bank_asset_value
0,6,3,0,0,9100000,29700000,20,506,7100000,4500000,33300000,12800000


In [139]:

example_scaled = scaler.transform(example_features)


In [141]:

prediction = model.predict(example_scaled)



In [143]:

predicted_label = 'Approved' if prediction[0] == 1 else 'Rejected'



In [145]:

print(f"Loan ID: {example_df['loan_id'][0]}, Prediction: {predicted_label}")

Loan ID: 6, Prediction: Approved
