In [1]:
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from keras.models import Sequential
from keras.layers import Dense


In [10]:
columns_to_use = ['loan_amnt', 'funded_amnt', 'term', 'int_rate', 'installment',
                  'emp_length', 'home_ownership', 'annual_inc', 'verification_status',
                  'loan_status', 'purpose','repay_fail']

loan = pd.read_excel('ballanced_dataset.xlsx', usecols=columns_to_use)

In [11]:
# Select columns for features and target
selected_numeric_columns = ['loan_amnt', 'funded_amnt', 'int_rate', 'installment', 'emp_length', 'annual_inc']
selected_categorical_columns = ['term', 'home_ownership', 'verification_status', 'purpose']

In [12]:
# X = loan[selected_numeric_columns + selected_categorical_columns]
# y = loan['repay_fail']

In [14]:
# Preprocess Data
X = loan[['loan_amnt', 'funded_amnt', 'term', 'int_rate', 'installment',
          'emp_length', 'home_ownership', 'annual_inc', 'verification_status',
          'purpose']]  # Selected features
y = loan['repay_fail']  # Target variable

# Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Preprocess numerical features
numeric_features = ['loan_amnt', 'funded_amnt', 'int_rate', 'installment', 'emp_length', 'annual_inc']
scaler = StandardScaler()
X_train[numeric_features] = scaler.fit_transform(X_train[numeric_features])
X_test[numeric_features] = scaler.transform(X_test[numeric_features])

# One-hot encode categorical features
categorical_features = ['term', 'home_ownership', 'verification_status', 'purpose']
X_train_encoded = pd.get_dummies(X_train, columns=categorical_features, drop_first=True)
X_test_encoded = pd.get_dummies(X_test, columns=categorical_features, drop_first=True)

# Create a neural network model
model = Sequential([
    Dense(units=64, activation='relu', input_shape=(X_train_encoded.shape[1],)),
    Dense(units=1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Fit the model on training data
model.fit(X_train_encoded, y_train, epochs=10, batch_size=32, verbose=1)

# Predict using the model on test data
y_pred_prob = model.predict(X_test_encoded)
y_pred = (y_pred_prob > 0.5).astype(int)

# Calculate classification report
classification_rep = classification_report(y_test, y_pred)
print(classification_rep)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
              precision    recall  f1-score   support

           0       0.63      0.65      0.64      1150
           1       0.65      0.63      0.64      1172

    accuracy                           0.64      2322
   macro avg       0.64      0.64      0.64      2322
weighted avg       0.64      0.64      0.64      2322



In [15]:
y_pred

array([[1],
       [0],
       [0],
       ...,
       [1],
       [1],
       [1]])

In [16]:
y_test

10154    1
7181     1
7550     1
7990     1
1299     0
        ..
8260     1
7790     1
3345     0
7803     1
693      0
Name: repay_fail, Length: 2322, dtype: int64