# ** Neural Networks Are a Good Choice using frameworks like TensorFlow/Keras or PyTorch**

Large and Complex Data: Neural networks excel at capturing complex patterns and nonlinear relationships.

High Dimensionality: If the dataset has many features, neural networks can effectively model intricate interactions.

Enough Data: Neural networks require a substantial amount of data for training. If the dataset is small, traditional models might perform better.

Nonlinearity: Loan interest rates or amounts are often influenced by nonlinear factors (e.g., borrower income, loan intent, previous defaults). Neural networks can model these relationships better than linear models.

In [1]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [4]:
# Step 1: Load and preprocess the data
loan_data = pd.read_csv("/content/loan_data.csv")  # Replace with your dataset path
loan_data.head()

Unnamed: 0,person_age,person_gender,person_education,person_income,person_emp_exp,person_home_ownership,loan_amnt,loan_intent,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,credit_score,previous_loan_defaults_on_file,loan_status
0,22.0,female,Master,71948.0,0,RENT,35000.0,PERSONAL,16.02,0.49,3.0,561,No,1
1,21.0,female,High School,12282.0,0,OWN,1000.0,EDUCATION,11.14,0.08,2.0,504,Yes,0
2,25.0,female,High School,12438.0,3,MORTGAGE,5500.0,MEDICAL,12.87,0.44,3.0,635,No,1
3,23.0,female,Bachelor,79753.0,0,RENT,35000.0,MEDICAL,15.23,0.44,2.0,675,No,1
4,24.0,male,Master,66135.0,1,RENT,35000.0,MEDICAL,14.27,0.53,4.0,586,No,1


In [5]:
loan_data.columns.value_counts()

Unnamed: 0,count
person_age,1
person_gender,1
person_education,1
person_income,1
person_emp_exp,1
person_home_ownership,1
loan_amnt,1
loan_intent,1
loan_int_rate,1
loan_percent_income,1


In [37]:
# convert categorical data into Numerical data before scaling
loan_data.replace({
                    'person_gender':{'female':0, 'male':1},
                    'person_education':{'High School':0, 'Bachelor':1,'Associate':2,'Master':3, 'Doctorate':4},
                    'person_home_ownership':{'RENT':0, 'MORTGAGE': 1, 'OWN':2, 'OTHER':3},
                    'loan_intent':{'No':0, 'Yes':1},
                    'previous_loan_defaults_on_file': {'No':0, 'Yes':1},
                    'loan_intent': {'EDUCATION':0,'MEDICAL':1, 'VENTURE':2, 'PERSONAL':3, 'DEBTCONSOLIDATION': 4,
                                        'HOMEIMPROVEMENT': 5 }},inplace=True)


In [38]:
loan_data.head()

Unnamed: 0,person_age,person_gender,person_education,person_income,person_emp_exp,person_home_ownership,loan_amnt,loan_intent,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,credit_score,previous_loan_defaults_on_file,loan_status
0,22.0,0,3,71948.0,0,0,35000.0,3,16.02,0.49,3.0,561,0,1
1,21.0,0,0,12282.0,0,2,1000.0,0,11.14,0.08,2.0,504,1,0
2,25.0,0,0,12438.0,3,1,5500.0,1,12.87,0.44,3.0,635,0,1
3,23.0,0,1,79753.0,0,0,35000.0,1,15.23,0.44,2.0,675,0,1
4,24.0,1,3,66135.0,1,0,35000.0,1,14.27,0.53,4.0,586,0,1


In [30]:
# Define features (X) and target (y)
target_column = 'loan_amnt'   # Target variable: loan_amnt or loan_int_rate
feature_column = ['person_age','person_gender','person_education','person_income','person_emp_exp',
'person_home_ownership'	,'loan_intent','loan_int_rate','loan_percent_income','cb_person_cred_hist_length',
'credit_score',	'previous_loan_defaults_on_file']


X = loan_data[feature_column] # feature columns
y = loan_data[target_column]  # target_column
print(X.shape, y.shape)

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
print(X_scaled)


(45000, 12) (45000,)
[[-0.95353824 -1.11006918  1.53365241 ... -0.73910854 -1.41981408
  -1.01603973]
 [-1.11896309 -1.11006918 -1.25965955 ... -0.99686317 -2.5499748
   0.98421348]
 [-0.45726369 -1.11006918 -1.25965955 ... -0.73910854  0.04741211
  -1.01603973]
 ...
 [ 0.8661351   0.90084476  0.60254843 ...  1.06517387  0.70171569
  -1.01603973]
 [ 0.2044357   0.90084476 -0.32855556 ...  0.03415535 -0.5672367
  -1.01603973]
 [-0.62268854  0.90084476 -1.25965955 ... -0.73910854 -0.09137955
  -1.01603973]]


In [46]:
print(X_train.shape, X_test.shape )
print(y_train.shape, y_test.shape)

(36000, 12) (9000, 12)
(36000,) (9000,)


In [22]:
# Step 3: Build the Neural Network model
model = Sequential()

# Input layer + hidden layers
model.add(Dense(128, activation='relu', input_shape=(X_train.shape[1],)))  # Input layer
model.add(Dense(64, activation='relu'))  # Hidden layer 1
model.add(Dense(32, activation='relu'))  # Hidden layer 2

# Output layer
model.add(Dense(1))  # Linear activation for regression tasks

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])  # Loss: Mean Squared Error


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [23]:
# Step 4: Train the model
history = model.fit(X_train, y_train, validation_split=0.2, epochs=50, batch_size=32, verbose=1)

Epoch 1/50
[1m900/900[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 83366296.0000 - mae: 6784.9639 - val_loss: 25250352.0000 - val_mae: 3230.3977
Epoch 2/50
[1m900/900[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 23088252.0000 - mae: 3200.2510 - val_loss: 22609982.0000 - val_mae: 3110.5496
Epoch 3/50
[1m900/900[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 18816426.0000 - mae: 3022.2917 - val_loss: 20443370.0000 - val_mae: 2992.5691
Epoch 4/50
[1m900/900[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 19725766.0000 - mae: 2917.0676 - val_loss: 18218068.0000 - val_mae: 2630.3638
Epoch 5/50
[1m900/900[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 14970485.0000 - mae: 2499.7749 - val_loss: 14316665.0000 - val_mae: 2191.4998
Epoch 6/50
[1m900/900[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 10539435.0000 - mae: 2028.9204 - val_loss: 107

In [24]:
# Step 5: Evaluate the model
y_pred = model.predict(X_test)

# Calculate performance metrics
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print("Neural Network Performance:")
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R-squared (R²): {r2}")


[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Neural Network Performance:
Mean Absolute Error (MAE): 322.2998709988064
Root Mean Squared Error (RMSE): 476.36198436722873
R-squared (R²): 0.9943623633528262


In [25]:
# Evaluate on Training Data
train_loss, train_mae = model.evaluate(X_train, y_train, verbose=0)
print(f"Training Loss (MSE): {train_loss}")
print(f"Training MAE: {train_mae}")

# Evaluate on Testing Data
test_loss, test_mae = model.evaluate(X_test, y_test, verbose=0)
print(f"Testing Loss (MSE): {test_loss}")
print(f"Testing MAE: {test_mae}")


Training Loss (MSE): 213223.46875
Training MAE: 316.32421875
Testing Loss (MSE): 226920.75
Testing MAE: 322.29986572265625


In [26]:
print(feature_column)


['person_age', 'person_gender', 'person_education', 'person_income', 'person_emp_exp', 'person_home_ownership', 'loan_intent', 'loan_int_rate', 'loan_percent_income', 'cb_person_cred_hist_length', 'credit_score', 'previous_loan_defaults_on_file']


In [27]:
# Example input features
input_data = {
    'person_age': 35,
    'person_gender': 1,  # 1 for Male
    'person_education': 1,  # 1 for Bachelor
    'person_income': 50000,
    'person_emp_exp': 10,
    'person_home_ownership': 0,  # 0 for RENT
    'loan_intent': 3,  # 3 for PERSONAL
    'loan_int_rate': 12.5,
    'loan_percent_income': 0.2,
    'cb_person_cred_hist_length': 4,
    'credit_score': 750,
    'previous_loan_defaults_on_file': 0,  # 0 for No
}


In [36]:
import numpy as np

# Convert input data to a format compatible with the model
input_features = np.array([[35, 1, 1, 50000, 10, 0, 3, 12.5, 0.2, 4, 750, 0]])

# Standardize the input features (using the same scaler used during training)
input_features_scaled = scaler.transform(input_features)

# Predict loan amount using the trained neural network model
predicted_loan_amount = model.predict(input_features_scaled)

print(f"Predicted Loan Amount: {predicted_loan_amount[0][0]:.2f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
Predicted Loan Amount: 9702.79




In [39]:
()

Unnamed: 0,person_age,person_gender,person_education,person_income,person_emp_exp,person_home_ownership,loan_amnt,loan_intent,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,credit_score,previous_loan_defaults_on_file,loan_status
0,22.0,0,3,71948.0,0,0,35000.0,3,16.02,0.49,3.0,561,0,1
1,21.0,0,0,12282.0,0,2,1000.0,0,11.14,0.08,2.0,504,1,0
2,25.0,0,0,12438.0,3,1,5500.0,1,12.87,0.44,3.0,635,0,1
3,23.0,0,1,79753.0,0,0,35000.0,1,15.23,0.44,2.0,675,0,1
4,24.0,1,3,66135.0,1,0,35000.0,1,14.27,0.53,4.0,586,0,1


In [40]:
feature_column

['person_age',
 'person_gender',
 'person_education',
 'person_income',
 'person_emp_exp',
 'person_home_ownership',
 'loan_intent',
 'loan_int_rate',
 'loan_percent_income',
 'cb_person_cred_hist_length',
 'credit_score',
 'previous_loan_defaults_on_file']

In [42]:
import numpy as np

# Convert input data to a format compatible with the model
input_features = np.array([[22, 0, 3, 71948.0, 0, 0, 3, 16.02, 0.49, 3.0, 561, 0]])

# Standardize the input features (using the same scaler used during training)
input_features_scaled = scaler.transform(input_features)

# Predict loan amount using the trained neural network model
predicted_loan_amount = model.predict(input_features_scaled)

print(f"Predicted Loan Amount: {predicted_loan_amount[0][0]:.2f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
Predicted Loan Amount: 31868.57




In [43]:
# Compare predicted vs actual


results = pd.DataFrame({
    'Actual': y_test.values.flatten(), # Use .values to access the NumPy array
    'Predicted': y_pred.flatten()
})

print(results.head(5))
print(results.tail(5))


    Actual     Predicted
0   7500.0   7702.019043
1   9000.0   8880.768555
2   5000.0   4281.913574
3   5000.0   5261.187012
4  10000.0  10113.776367
       Actual     Predicted
8995  14000.0  14453.375977
8996   6725.0   6554.520508
8997  14000.0  13727.569336
8998   8000.0   7937.621582
8999   8500.0   8040.936523
