In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense


# Load and Preprocess the Data

In [72]:
# Load the dataset
dataset = pd.read_csv('/content/train.csv')

In [73]:
dataset

Unnamed: 0,id,person_age,person_income,person_home_ownership,person_emp_length,loan_intent,loan_grade,loan_amnt,loan_int_rate,loan_percent_income,cb_person_default_on_file,cb_person_cred_hist_length,loan_status
0,0,37,35000,RENT,0.0,EDUCATION,B,6000,11.49,0.17,N,14,0
1,1,22,56000,OWN,6.0,MEDICAL,C,4000,13.35,0.07,N,2,0
2,2,29,28800,OWN,8.0,PERSONAL,A,6000,8.90,0.21,N,10,0
3,3,30,70000,RENT,14.0,VENTURE,B,12000,11.11,0.17,N,5,0
4,4,22,60000,RENT,2.0,MEDICAL,A,6000,6.92,0.10,N,3,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
58640,58640,34,120000,MORTGAGE,5.0,EDUCATION,D,25000,15.95,0.21,Y,10,0
58641,58641,28,28800,RENT,0.0,MEDICAL,C,10000,12.73,0.35,N,8,1
58642,58642,23,44000,RENT,7.0,EDUCATION,D,6800,16.00,0.15,N,2,1
58643,58643,22,30000,RENT,2.0,EDUCATION,A,5000,8.90,0.17,N,3,0


In [74]:
dataset = dataset.drop('id', axis=1)

In [75]:
# Separate features and target variable
x = dataset.drop('loan_status', axis=1)  # Assuming 'loan_status' is the target variable
y = dataset['loan_status']

In [76]:
x

Unnamed: 0,person_age,person_income,person_home_ownership,person_emp_length,loan_intent,loan_grade,loan_amnt,loan_int_rate,loan_percent_income,cb_person_default_on_file,cb_person_cred_hist_length
0,37,35000,RENT,0.0,EDUCATION,B,6000,11.49,0.17,N,14
1,22,56000,OWN,6.0,MEDICAL,C,4000,13.35,0.07,N,2
2,29,28800,OWN,8.0,PERSONAL,A,6000,8.90,0.21,N,10
3,30,70000,RENT,14.0,VENTURE,B,12000,11.11,0.17,N,5
4,22,60000,RENT,2.0,MEDICAL,A,6000,6.92,0.10,N,3
...,...,...,...,...,...,...,...,...,...,...,...
58640,34,120000,MORTGAGE,5.0,EDUCATION,D,25000,15.95,0.21,Y,10
58641,28,28800,RENT,0.0,MEDICAL,C,10000,12.73,0.35,N,8
58642,23,44000,RENT,7.0,EDUCATION,D,6800,16.00,0.15,N,2
58643,22,30000,RENT,2.0,EDUCATION,A,5000,8.90,0.17,N,3


In [63]:
y

Unnamed: 0,loan_status
0,0
1,0
2,0
3,0
4,0
...,...
58640,0
58641,1
58642,1
58643,0


In [77]:
# Encode categorical variables
ct = ColumnTransformer(transformers=[
    ('oneHotEncoder', OneHotEncoder(), ['person_home_ownership', 'loan_intent', 'loan_grade']),
    ('ordinalEncoder', OrdinalEncoder(), ['cb_person_default_on_file'])
], remainder='passthrough')

x = ct.fit_transform(x)

In [109]:
import joblib

In [110]:
joblib.dump(ct ,'Loan Approval Prediction_ANN_encoder.pkl')

['Loan Approval Prediction_ANN_encoder.pkl']

In [79]:
x

array([[ 0.  ,  0.  ,  0.  , ..., 11.49,  0.17, 14.  ],
       [ 0.  ,  0.  ,  1.  , ..., 13.35,  0.07,  2.  ],
       [ 0.  ,  0.  ,  1.  , ...,  8.9 ,  0.21, 10.  ],
       ...,
       [ 0.  ,  0.  ,  0.  , ..., 16.  ,  0.15,  2.  ],
       [ 0.  ,  0.  ,  0.  , ...,  8.9 ,  0.17,  3.  ],
       [ 1.  ,  0.  ,  0.  , ..., 11.11,  0.2 ,  5.  ]])

In [80]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)


In [81]:

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [111]:
joblib.dump(ct ,'Loan Approval Prediction_ANN_scaler.pkl')

['Loan Approval Prediction_ANN_scaler.pkl']

# Build the ANN **Model**

In [17]:
# Initialize the ANN
model = Sequential()

In [18]:
# Add input layer and first hidden layer
model.add(Dense(units=16, activation='relu', input_dim=X_train.shape[1]))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [19]:
# Add second hidden layer
model.add(Dense(units=8, activation='relu'))


In [20]:
# Add output layer
model.add(Dense(units=1, activation='sigmoid'))


In [22]:
# Compile the ANN
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the **Model**

In [23]:
# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=10, validation_split=0.2)

Epoch 1/50
[1m3754/3754[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2ms/step - accuracy: 0.8931 - loss: 0.3079 - val_accuracy: 0.9267 - val_loss: 0.2189
Epoch 2/50
[1m3754/3754[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3ms/step - accuracy: 0.9357 - loss: 0.1988 - val_accuracy: 0.9300 - val_loss: 0.2099
Epoch 3/50
[1m3754/3754[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2ms/step - accuracy: 0.9380 - loss: 0.1938 - val_accuracy: 0.9376 - val_loss: 0.2007
Epoch 4/50
[1m3754/3754[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 4ms/step - accuracy: 0.9420 - loss: 0.1859 - val_accuracy: 0.9373 - val_loss: 0.1968
Epoch 5/50
[1m3754/3754[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 2ms/step - accuracy: 0.9433 - loss: 0.1837 - val_accuracy: 0.9388 - val_loss: 0.1970
Epoch 6/50
[1m3754/3754[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 2ms/step - accuracy: 0.9420 - loss: 0.1846 - val_accuracy: 0.9381 - val_loss: 0.1942
Epoch 7/50


<keras.src.callbacks.history.History at 0x7df55f68bdf0>

# Evaluate the Model

In [82]:
# Predict on the test set
y_pred = model.predict(X_test)
y_pred_binary = (y_pred > 0.5).astype(int)

# Evaluate the model
from sklearn.metrics import accuracy_score, precision_score, recall_score

accuracy = accuracy_score(y_test, y_pred_binary)
precision = precision_score(y_test, y_pred_binary)
recall = recall_score(y_test, y_pred_binary)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")

[1m367/367[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Accuracy: 0.9481626737147242
Precision: 0.9129392971246006
Recall: 0.6961023142509135


In [83]:
y_pred_binary

array([[0],
       [0],
       [0],
       ...,
       [0],
       [0],
       [0]])

In [84]:
y_test

Unnamed: 0,loan_status
4657,0
56440,0
42117,1
10849,0
39395,0
...,...
12934,0
39660,0
28657,0
24735,0


# **Save the Model**

In [113]:
model.save('loan_approval_model_ann.h5')



In [29]:
submission_data = pd.read_csv('/content/sample_submission.csv')

In [101]:
submission_data['loan_status'].nunique()

1

In [33]:
submission_data.shape

(39098, 2)

In [35]:
len(y_test)

11729

# Evaluate the Model for Given Test Data

In [85]:
test = pd.read_csv('/content/test.csv')

In [44]:
test.isnull().any().sum()

0

In [45]:
test.head()

Unnamed: 0,id,person_age,person_income,person_home_ownership,person_emp_length,loan_intent,loan_grade,loan_amnt,loan_int_rate,loan_percent_income,cb_person_default_on_file,cb_person_cred_hist_length
0,58645,23,69000,RENT,3.0,HOMEIMPROVEMENT,F,25000,15.76,0.36,N,2
1,58646,26,96000,MORTGAGE,6.0,PERSONAL,C,10000,12.68,0.1,Y,4
2,58647,26,30000,RENT,5.0,VENTURE,E,4000,17.19,0.13,Y,2
3,58648,33,50000,RENT,4.0,DEBTCONSOLIDATION,A,7000,8.9,0.14,N,7
4,58649,26,102000,MORTGAGE,8.0,HOMEIMPROVEMENT,D,15000,16.32,0.15,Y,4


In [86]:
test = test.drop('id', axis=1)

In [87]:
test

Unnamed: 0,person_age,person_income,person_home_ownership,person_emp_length,loan_intent,loan_grade,loan_amnt,loan_int_rate,loan_percent_income,cb_person_default_on_file,cb_person_cred_hist_length
0,23,69000,RENT,3.0,HOMEIMPROVEMENT,F,25000,15.76,0.36,N,2
1,26,96000,MORTGAGE,6.0,PERSONAL,C,10000,12.68,0.10,Y,4
2,26,30000,RENT,5.0,VENTURE,E,4000,17.19,0.13,Y,2
3,33,50000,RENT,4.0,DEBTCONSOLIDATION,A,7000,8.90,0.14,N,7
4,26,102000,MORTGAGE,8.0,HOMEIMPROVEMENT,D,15000,16.32,0.15,Y,4
...,...,...,...,...,...,...,...,...,...,...,...
39093,22,31200,MORTGAGE,2.0,DEBTCONSOLIDATION,B,3000,10.37,0.10,N,4
39094,22,48000,MORTGAGE,6.0,EDUCATION,A,7000,6.03,0.15,N,3
39095,51,60000,MORTGAGE,0.0,PERSONAL,A,15000,7.51,0.25,N,25
39096,22,36000,MORTGAGE,4.0,PERSONAL,D,14000,15.62,0.39,Y,4


## Encode categorical **variables** for test data

In [88]:
test = ct.transform(test)

In [89]:
test

array([[ 0.  ,  0.  ,  0.  , ..., 15.76,  0.36,  2.  ],
       [ 1.  ,  0.  ,  0.  , ..., 12.68,  0.1 ,  4.  ],
       [ 0.  ,  0.  ,  0.  , ..., 17.19,  0.13,  2.  ],
       ...,
       [ 1.  ,  0.  ,  0.  , ...,  7.51,  0.25, 25.  ],
       [ 1.  ,  0.  ,  0.  , ..., 15.62,  0.39,  4.  ],
       [ 0.  ,  0.  ,  0.  , ...,  9.91,  0.44,  9.  ]])

## Standardize the data for test data

In [90]:

test = scaler.transform(test)

In [None]:
# Predict on the test set
test_pred = model.predict(test)
print(test_pred)


In [103]:
# Create a DataFrame with the predictions
predictions_df = pd.DataFrame({
    'actula_loan_status': submission_data['loan_status'],  # Replace with your actual test labels if not already in a DataFrame
    'loan_status': test_pred.flatten()  # Ensure y_pred_binary is flattened to a 1D array
})

In [104]:
predictions_df

Unnamed: 0,actula_loan_status,loan_status
0,0.5,0.998658
1,0.5,0.027018
2,0.5,0.458932
3,0.5,0.034372
4,0.5,0.108041
...,...,...
39093,0.5,0.024415
39094,0.5,0.007123
39095,0.5,0.018138
39096,0.5,0.250365


In [105]:
submission_data

Unnamed: 0,id,loan_status
0,58645,0.5
1,58646,0.5
2,58647,0.5
3,58648,0.5
4,58649,0.5
...,...,...
39093,97738,0.5
39094,97739,0.5
39095,97740,0.5
39096,97741,0.5


In [106]:
# Create a DataFrame with the predictions
submission_df = pd.DataFrame({
    'id': submission_data['id'],  # Replace with your actual test labels if not already in a DataFrame
    'loan_status': test_pred.flatten()  # Ensure y_pred_binary is flattened to a 1D array
})

In [107]:
submission_df

Unnamed: 0,id,loan_status
0,58645,0.998658
1,58646,0.027018
2,58647,0.458932
3,58648,0.034372
4,58649,0.108041
...,...,...
39093,97738,0.024415
39094,97739,0.007123
39095,97740,0.018138
39096,97741,0.250365


In [108]:
# Save the DataFrame to a CSV file
submission_df.to_csv('predictions.csv', index=False)