In [1]:
import psycopg2
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from shap import Explainer
import pickle
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score, confusion_matrix


In [2]:
server = "localhost"
database = "db_02"
username = "postgres"
password = "ABcd@12#$"
source = "creditcard_train"

data_train=[]

try:
  conn = psycopg2.connect(dbname=database, user=username, password=password, host=server, port="5432", sslmode='disable')
except psycopg2.Error as e:
  print("Error connecting to PostgreSQL database:", e)
else:
  print("Connection established successfully!")

cursor = conn.cursor()

query =f"SELECT * FROM {source}"
cursor.execute(query)

for i in cursor.fetchall():
  data_train.append(i[1:])

column_names = ['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount', 'Class']
data_train=pd.DataFrame(data_train, dtype=float, columns=column_names)
# print(data_train)

Connection established successfully!


In [3]:
# Data Pre-Processing
data_train = data_train.fillna(...)

categorical_features = []

if len(categorical_features) > 0:
  encoder = LabelEncoder()
  for feature in categorical_features:
    data_train[feature] = encoder.fit_transform(data_train[feature])

scaler = StandardScaler()

features_to_scale = data_train.drop("Class", axis=1).columns
data_train_scaled = scaler.fit_transform(data_train[features_to_scale])

X_train = data_train_scaled
y_train = data_train["Class"]

In [4]:
print(X_train.shape, y_train.shape)

(213605, 30) (213605,)


In [5]:
model = Sequential([
    Flatten(input_shape=(30,)),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid'),
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [6]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 30)                0         
                                                                 
 dense (Dense)               (None, 128)               3968      
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 64)                8256      
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense_2 (Dense)             (None, 1)                 65        
                                                                 
Total params: 12,289
Trainable params: 12,289
Non-traina

In [8]:
# basic deep learning network
model.fit(X_train, y_train, epochs=10, verbose=0)
y_pred = model.predict(X_train, verbose=0).astype(int)

accuracy = accuracy_score(y_train, y_pred)
print("Accuracy:", accuracy)
confusion_mat = confusion_matrix(y_train, y_pred)
print("Confusion Matrix:\n", confusion_mat)
classification_rep = classification_report(y_train, y_pred)
print("Classification Report:\n", classification_rep)
roc_auc = roc_auc_score(y_train, y_pred)
print("AUC-ROC:", roc_auc)

Accuracy: 0.9982210154256689
Confusion Matrix:
 [[213207      0]
 [   380     18]]
Classification Report:
               precision    recall  f1-score   support

         0.0       1.00      1.00      1.00    213207
         1.0       1.00      0.05      0.09       398

    accuracy                           1.00    213605
   macro avg       1.00      0.52      0.54    213605
weighted avg       1.00      1.00      1.00    213605

AUC-ROC: 0.5226130653266332


In [None]:
explainer = Explainer(model, X_train)
shap_values = explainer(X_train)

with open("model_dl.pkl", "wb") as f:
    pickle.dump(model, f)

with open("explainer_dl.pkl", "wb") as f:
    pickle.dump(explainer, f)

PermutationExplainer explainer:  80%|███████▉  | 170657/213605 [13:52:06<2:22:01,  5.04it/s] 