In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, LSTM, concatenate

In [2]:
credit_data = pd.read_csv('Credit_Dataset.csv')
credit_data.head()

Unnamed: 0,BAD,LOAN,MORTDUE,VALUE,REASON,JOB,YOJ,DEROG,DELINQ,CLAGE,NINQ,CLNO,DEBTINC
0,1,1100,25860.0,39025.0,HomeImp,Other,10.5,0.0,0.0,94.366667,1.0,9.0,
1,1,1300,70053.0,68400.0,HomeImp,Other,7.0,0.0,2.0,121.833333,0.0,14.0,
2,1,1500,13500.0,16700.0,HomeImp,Other,4.0,0.0,0.0,149.466667,1.0,10.0,
3,1,1500,,,,,,,,,,,
4,0,1700,97800.0,112000.0,HomeImp,Office,3.0,0.0,0.0,93.333333,0.0,14.0,


In [3]:
credit_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5960 entries, 0 to 5959
Data columns (total 13 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   BAD      5960 non-null   int64  
 1   LOAN     5960 non-null   int64  
 2   MORTDUE  5442 non-null   float64
 3   VALUE    5848 non-null   float64
 4   REASON   5708 non-null   object 
 5   JOB      5681 non-null   object 
 6   YOJ      5445 non-null   float64
 7   DEROG    5252 non-null   float64
 8   DELINQ   5380 non-null   float64
 9   CLAGE    5652 non-null   float64
 10  NINQ     5450 non-null   float64
 11  CLNO     5738 non-null   float64
 12  DEBTINC  4693 non-null   float64
dtypes: float64(9), int64(2), object(2)
memory usage: 605.4+ KB


In [4]:
credit_data.isna().sum()

BAD           0
LOAN          0
MORTDUE     518
VALUE       112
REASON      252
JOB         279
YOJ         515
DEROG       708
DELINQ      580
CLAGE       308
NINQ        510
CLNO        222
DEBTINC    1267
dtype: int64

In [5]:
duplicate_count = credit_data.duplicated().sum()
print("Total number of duplicate rows:", duplicate_count)
null_values_count = credit_data.isnull().sum()
print("Total number of null values in each column:")
print(null_values_count)

Total number of duplicate rows: 0
Total number of null values in each column:
BAD           0
LOAN          0
MORTDUE     518
VALUE       112
REASON      252
JOB         279
YOJ         515
DEROG       708
DELINQ      580
CLAGE       308
NINQ        510
CLNO        222
DEBTINC    1267
dtype: int64


In [6]:
credit_data = credit_data.drop("REASON",axis=1)
credit_data = credit_data.drop("JOB",axis=1)
credit_data = credit_data.fillna(credit_data.mean())
credit_data.isna().sum()

BAD        0
LOAN       0
MORTDUE    0
VALUE      0
YOJ        0
DEROG      0
DELINQ     0
CLAGE      0
NINQ       0
CLNO       0
DEBTINC    0
dtype: int64

In [8]:
credit_data.shape

(5960, 11)

In [6]:
credit_data.rename(columns={"BAD":"TARGET"},inplace=True)
X = credit_data.drop(columns="TARGET",axis=1)
Y = credit_data["TARGET"]

X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.20,stratify=Y,random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [7]:
lr_classifier = LogisticRegression()
dt_classifier = DecisionTreeClassifier()
rf_classifier = RandomForestClassifier()

In [9]:
lr_classifier.fit(X_train_scaled, Y_train)
lr_predictions = lr_classifier.predict(X_test_scaled)
lr_accuracy = accuracy_score(Y_test, lr_predictions)
lr_report = classification_report(Y_test, lr_predictions)

In [10]:
dt_classifier.fit(X_train_scaled, Y_train)
dt_predictions = dt_classifier.predict(X_test_scaled)
dt_accuracy = accuracy_score(Y_test, dt_predictions)
dt_report = classification_report(Y_test, dt_predictions)

In [11]:
rf_classifier.fit(X_train_scaled, Y_train)
rf_predictions = rf_classifier.predict(X_test_scaled)
rf_accuracy = accuracy_score(Y_test, rf_predictions)
rf_report = classification_report(Y_test, rf_predictions)

In [17]:
# Define MLP model
mlp_model = Sequential()
mlp_model.add(Dense(64, activation='relu', input_dim=X_train_scaled.shape[1]))
mlp_model.add(Dense(64, activation='relu'))
mlp_model.add(Dense(1, activation='sigmoid'))

# Compile and train the MLP model
mlp_model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])
mlp_model.fit(X_train_scaled, Y_train, epochs=10, batch_size=32, verbose=0)

# Evaluate MLP model
mlp_probabilities = mlp_model.predict(X_test_scaled)
mlp_predictions = (mlp_probabilities > 0.5).astype(int)
mlp_accuracy = accuracy_score(Y_test, mlp_predictions)
mlp_report = classification_report(Y_test, mlp_predictions)



In [18]:
X_train_reshaped = X_train_scaled.reshape((X_train_scaled.shape[0], X_train_scaled.shape[1], 1))
X_test_reshaped = X_test_scaled.reshape((X_test_scaled.shape[0], X_test_scaled.shape[1], 1))

In [22]:
cnn_model = Sequential()
cnn_model.add(Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(X_train_reshaped.shape[1], 1)))
cnn_model.add(MaxPooling1D(pool_size=2))
cnn_model.add(Flatten())
cnn_model.add(Dense(64, activation='relu'))
cnn_model.add(Dense(1, activation='sigmoid'))

cnn_model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])
cnn_model.fit(X_train_reshaped, Y_train, epochs=10, batch_size=32, verbose=0)

<keras.callbacks.History at 0x27812a36280>

In [24]:
cnn_probabilities = cnn_model.predict(X_test_reshaped)
cnn_predictions = (cnn_probabilities > 0.5).astype(int)
cnn_accuracy = accuracy_score(Y_test, cnn_predictions)
cnn_report = classification_report(Y_test, cnn_predictions)



In [25]:
rnn_model = Sequential()
rnn_model.add(LSTM(64, input_shape=(X_train_reshaped.shape[1], 1)))
rnn_model.add(Dense(1, activation='sigmoid'))

# Compile and train the RNN model
rnn_model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])
rnn_model.fit(X_train_reshaped, Y_train, epochs=10, batch_size=32, verbose=0)

# Evaluate RNN model
rnn_probabilities = rnn_model.predict(X_test_reshaped)
rnn_predictions = (rnn_probabilities > 0.5).astype(int)
rnn_accuracy = accuracy_score(Y_test, rnn_predictions)
rnn_report = classification_report(Y_test, rnn_predictions)



In [26]:
input_shape = (X_train_reshaped.shape[1], 1)

hybrid_model = Sequential()
hybrid_model.add(Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=input_shape))
hybrid_model.add(MaxPooling1D(pool_size=2))
hybrid_model.add(LSTM(64))
hybrid_model.add(Dense(1, activation='sigmoid'))

# Compile and train the Hybrid model
hybrid_model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])
hybrid_model.fit(X_train_reshaped, Y_train, epochs=10, batch_size=32, verbose=0)

# Evaluate Hybrid model
hybrid_probabilities = hybrid_model.predict(X_test_reshaped)
hybrid_predictions = (hybrid_probabilities > 0.5).astype(int)
hybrid_accuracy = accuracy_score(Y_test, hybrid_predictions)
hybrid_report = classification_report(Y_test, hybrid_predictions)



In [27]:
# Compare results
print('Traditional ML Classifiers:')
print('Logistic Regression Accuracy:', lr_accuracy)
print('Logistic Regression Report:\n', lr_report)
print('\nDecision Tree Accuracy:', dt_accuracy)
print('Decision Tree Report:\n', dt_report)
print('\nRandom Forest Accuracy:', rf_accuracy)
print('Random Forest Report:\n', rf_report)

print('\nDeep Learning Classifiers:')
print('MLP Accuracy:', mlp_accuracy)
print('MLP Report:\n', mlp_report)
print('\nCNN Accuracy:', cnn_accuracy)
print('CNN Report:\n', cnn_report)
print('\nRNN Accuracy:', rnn_accuracy)
print('RNN Report:\n', rnn_report)
print('\nHybrid (CNN + RNN) Accuracy:', hybrid_accuracy)
print('Hybrid (CNN + RNN) Report:\n', hybrid_report)

Traditional ML Classifiers:
Logistic Regression Accuracy: 0.8355704697986577
Logistic Regression Report:
               precision    recall  f1-score   support

           0       0.85      0.97      0.90       954
           1       0.70      0.31      0.43       238

    accuracy                           0.84      1192
   macro avg       0.77      0.64      0.67      1192
weighted avg       0.82      0.84      0.81      1192


Decision Tree Accuracy: 0.8699664429530202
Decision Tree Report:
               precision    recall  f1-score   support

           0       0.90      0.94      0.92       954
           1       0.70      0.60      0.65       238

    accuracy                           0.87      1192
   macro avg       0.80      0.77      0.78      1192
weighted avg       0.86      0.87      0.87      1192


Random Forest Accuracy: 0.9026845637583892
Random Forest Report:
               precision    recall  f1-score   support

           0       0.92      0.97      0.94       9