In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout

In [34]:

# Load data
df = pd.read_csv('loan_approval.csv')
df.tail()

Unnamed: 0,loan_id,no_of_dependents,education,self_employed,income_annum,loan_amount,loan_term,cibil_score,residential_assets_value,commercial_assets_value,luxury_assets_value,bank_asset_value,loan_status
4264,4265,5,Graduate,Yes,1000000,2300000,12,317,2800000,500000,3300000,800000,Rejected
4265,4266,0,Not Graduate,Yes,3300000,11300000,20,559,4200000,2900000,11000000,1900000,Approved
4266,4267,2,Not Graduate,No,6500000,23900000,18,457,1200000,12400000,18100000,7300000,Rejected
4267,4268,1,Not Graduate,No,4100000,12800000,8,780,8200000,700000,14100000,5800000,Approved
4268,4269,1,Graduate,No,9200000,29700000,10,607,17800000,11800000,35700000,12000000,Approved


In [15]:
#Strip column names to remove leading/trailing spaces
df.columns = df.columns.str.strip()

# Clean string values
df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)

  df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)


In [16]:
print(df['loan_status'].value_counts())


loan_status
Approved    2656
Rejected    1613
Name: count, dtype: int64


In [17]:
#Convert categorical columns to numeric
le_edu = LabelEncoder()
df['education'] = le_edu.fit_transform(df['education'])

le_self = LabelEncoder()
df['self_employed'] = le_self.fit_transform(df['self_employed'])

#Map loan_status and drop rows with NaN targets
df['loan_status'] = df['loan_status'].str.strip().map({'Approved': 1, 'Rejected': 0})



In [19]:
X = df.drop('loan_status', axis=1)
y = df['loan_status']
xtrain, xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state=0)


In [20]:
#  Standardize
scaler = StandardScaler()
xtrain = scaler.fit_transform(xtrain)
xtest = scaler.transform(xtest)

In [28]:
import joblib

joblib.dump(scaler, 'scaler.pkl')


['scaler.pkl']

In [29]:
# Build model
model = Sequential([
    Dense(32, activation='relu', input_shape=(xtrain.shape[1],)),
    Dropout(0.1),
    Dense(32, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

In [30]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

In [31]:
#train 
history = model.fit(xtrain, ytrain, epochs=50, batch_size=16, validation_data=(xtest, ytest))


Epoch 1/50
[1m214/214[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.6427 - loss: 0.6189 - val_accuracy: 0.9052 - val_loss: 0.3060
Epoch 2/50
[1m214/214[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8936 - loss: 0.2985 - val_accuracy: 0.9192 - val_loss: 0.2182
Epoch 3/50
[1m214/214[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9076 - loss: 0.2463 - val_accuracy: 0.9297 - val_loss: 0.2039
Epoch 4/50
[1m214/214[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9238 - loss: 0.2034 - val_accuracy: 0.9391 - val_loss: 0.1941
Epoch 5/50
[1m214/214[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9211 - loss: 0.2097 - val_accuracy: 0.9426 - val_loss: 0.1912
Epoch 6/50
[1m214/214[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9385 - loss: 0.1821 - val_accuracy: 0.9368 - val_loss: 0.1828
Epoch 7/50
[1m214/214[0m 

In [32]:
# Evaluate
loss, accuracy = model.evaluate(xtest, ytest)
print(f'Test loss: {loss:.4f}')
print(f'Test accuracy: {accuracy:.4f}')

[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9701 - loss: 0.0806 
Test loss: 0.0903
Test accuracy: 0.9696


In [33]:
#save model 
model.save('loan_approval_model.h5')


