In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [4]:
dataset  = pd.read_csv('Loan_default.csv')

In [5]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
imputer.fit(dataset.iloc[:, 1:9])
dataset.iloc[:, 1:9] = imputer.transform(dataset.iloc[:, 1:9])

In [6]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

categorical_cols = ['Education', 'EmploymentType', 'MaritalStatus', 'LoanPurpose']

# Create ColumnTransformer
ct = ColumnTransformer(
    transformers=[('encoder', OneHotEncoder(), categorical_cols)],
    remainder='passthrough'  # Keep all other columns as-is
)

from sklearn.preprocessing import LabelEncoder

# Define the columns to encode
binary_cols = ['HasMortgage', 'HasDependents', 'HasCoSigner']

# Apply label encoding
for col in binary_cols:
    le = LabelEncoder()
    dataset[col] = le.fit_transform(dataset[col])

# Split features and target
X = dataset.drop(columns=['LoanID','InterestRate'])  # Features

dataset['InterestRate'].fillna(dataset['InterestRate'].mean(), inplace=True)
y = dataset['InterestRate'].values          # Target

# Apply encoding to X
print(X.columns)
X = ct.fit_transform(X)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  dataset['InterestRate'].fillna(dataset['InterestRate'].mean(), inplace=True)


Index(['Age', 'Income', 'LoanAmount', 'CreditScore', 'MonthsEmployed',
       'NumCreditLines', 'LoanTerm', 'DTIRatio', 'Education', 'EmploymentType',
       'MaritalStatus', 'HasMortgage', 'HasDependents', 'LoanPurpose',
       'HasCoSigner', 'Default'],
      dtype='object')


In [7]:
print(X)

[[1. 0. 0. ... 1. 1. 0.]
 [0. 0. 1. ... 0. 1. 0.]
 [0. 0. 1. ... 1. 0. 1.]
 ...
 [0. 1. 0. ... 0. 1. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 1. 1. 0.]]


In [8]:
print(y)

[15.23  4.81 21.17 ... 21.11 22.01 12.77]


In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 1)

In [10]:
print(X_train)

[[0. 0. 0. ... 1. 0. 0.]
 [0. 1. 0. ... 1. 0. 0.]
 [1. 0. 0. ... 1. 1. 0.]
 ...
 [1. 0. 0. ... 1. 0. 0.]
 [1. 0. 0. ... 0. 1. 0.]
 [0. 0. 1. ... 1. 1. 0.]]


In [11]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [13]:
import tensorflow as tf

ann = tf.keras.models.Sequential()

In [15]:
ann.add(tf.keras.layers.Dense(units=10, activation='relu'))
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))
ann.add(tf.keras.layers.Dense(units=1, activation='linear'))

In [20]:
ann.compile(optimizer = 'adam', loss = 'mean_squared_error')

In [21]:
ann.fit(X_train, y_train, batch_size = 32, epochs = 10)

Epoch 1/10
[1m2752/2752[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - accuracy: 0.0000e+00 - loss: 43.2049
Epoch 2/10
[1m2752/2752[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.0000e+00 - loss: 43.5674
Epoch 3/10
[1m2752/2752[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.0000e+00 - loss: 43.3372
Epoch 4/10
[1m2752/2752[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2ms/step - accuracy: 0.0000e+00 - loss: 43.4309
Epoch 5/10
[1m2752/2752[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.0000e+00 - loss: 43.5157
Epoch 6/10
[1m2752/2752[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.0000e+00 - loss: 43.2152
Epoch 7/10
[1m2752/2752[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.0000e+00 - loss: 43.5957
Epoch 8/10
[1m2752/2752[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2ms/step - accuracy: 0.0000e+0

<keras.src.callbacks.history.History at 0x7ee4292b4ed0>