In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, LeakyReLU
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
from keras_tuner.tuners import BayesianOptimization
import datetime
# Load data
data = pd.read_csv('data.csv')


In [2]:
# view the columns
print(data.columns)

Index(['ClmAdmitDiagnosisCode', 'ClmDiagnosisCode_1', 'ClmDiagnosisCode_2',
       'ClmDiagnosisCode_9', 'ClmDiagnosisCode_10', 'Gender', 'Race',
       'RenalDiseaseIndicator', 'State', 'County', 'ChronicCond_Alzheimer',
       'ChronicCond_Heartfailure', 'ChronicCond_KidneyDisease',
       'ChronicCond_Cancer', 'ChronicCond_ObstrPulmonary',
       'ChronicCond_Depression', 'ChronicCond_Diabetes',
       'ChronicCond_IschemicHeart', 'ChronicCond_Osteoporasis',
       'ChronicCond_rheumatoidarthritis', 'ChronicCond_stroke',
       'PotentialFraud', 'Age', 'WeekendAdmission', 'IsDead',
       'ClaimSettlementDelay_Cat', 'TreatmentDuration_Cat',
       'Log_TotalClaimAmount', 'Log_IPTotalAmount', 'Log_OPTotalAmount',
       'UniquePhysCount', 'IsSamePhysMultiRole1', 'IsSamePhysMultiRole2',
       'PHY412132', 'PHY337425', 'PHY330576'],
      dtype='object')


In [2]:
# Convert target to binary and split data

train_data, test_data = train_test_split(data, test_size=0.2, random_state=123)

# Separate features and target variable
X_train = train_data.drop('PotentialFraud', axis=1)
y_train = train_data['PotentialFraud']
X_test = test_data.drop('PotentialFraud', axis=1)
y_test = test_data['PotentialFraud']


In [4]:
print(y_train.unique())


[1 0]


Define the Hypermodel

In [5]:
import numpy as np
from keras_tuner import HyperModel
# Class Weights Calculation for Imbalanced Data
class_weights = class_weight.compute_class_weight('balanced',
                                                  classes=np.unique(y_train),
                                                  y=y_train)
class_weights = dict(enumerate(class_weights))

class MyHyperModel(HyperModel):
    def __init__(self, input_shape):
        self.input_shape = input_shape

    def build(self, hp):
        model = Sequential()
        model.add(Dense(units=hp.Int('units', min_value=32, max_value=512, step=32),
                        input_shape=self.input_shape))
        model.add(LeakyReLU(alpha=0.01))
        model.add(Dropout(rate=hp.Float('dropout', min_value=0.0, max_value=0.5, step=0.1)))
        model.add(BatchNormalization())
        for i in range(hp.Int('num_layers', 1, 3)):
            model.add(Dense(units=hp.Int(f'units_{i}', min_value=32, max_value=512, step=32)))
            model.add(LeakyReLU(alpha=0.01))
            model.add(Dropout(rate=hp.Float(f'dropout_{i}', min_value=0.0, max_value=0.5, step=0.1)))
            model.add(BatchNormalization())
        model.add(Dense(1, activation='sigmoid'))
        model.compile(optimizer=keras.optimizers.Adam(hp.Float('learning_rate', 1e-4, 1e-2, sampling='log')),
                      loss='binary_crossentropy',
                      metrics=['accuracy', keras.metrics.Precision(), keras.metrics.Recall(), keras.metrics.AUC()])
        return model

In [9]:

# Instantiate the hypermodel
hypermodel = MyHyperModel(input_shape=(X_train.shape[1],))

# Bayesian Optimization
tuner = BayesianOptimization(
    hypermodel,
    objective='val_accuracy',
    max_trials=10,
    executions_per_trial=2,
    directory='my_dir',
    project_name='keras_tuner_bayesian'
)

# Search for the best hyperparameters
tuner.search(X_train, y_train, epochs=10, validation_split=0.2, class_weight=class_weights)

# Retrieve the best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

Trial 10 Complete [00h 27m 39s]
val_accuracy: 0.6461137235164642

Best val_accuracy So Far: 0.6461137235164642
Total elapsed time: 04h 21m 50s


In [10]:

# Build the model with the best hyperparameters
best_model = hypermodel.build(best_hps)

# Experiment Tracking with TensorBoard
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Training with early stopping and class weights
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
history = best_model.fit(X_train, y_train, epochs=50, validation_split=0.2, class_weight=class_weights, callbacks=[early_stopping, tensorboard_callback])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50


In [13]:
# Save the best model
best_model.save('best_nn_model.h5')


In [3]:
# Load the saved model
saved_best_model = keras.models.load_model('best_nn_model.h5')

# Generate predictions with the loaded model
loaded_nn_predictions = saved_best_model.predict(X_test)




In [18]:
loaded_nn_predictions.shape

(111643, 1)

In [7]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Assuming your neural network outputs probabilities and you have a binary classification task
threshold = 0.5
binary_predictions = (loaded_nn_predictions > threshold).astype(int)

# Evaluate the model
accuracy = accuracy_score(y_test, binary_predictions)
precision = precision_score(y_test, binary_predictions)
recall = recall_score(y_test, binary_predictions)
f1 = f1_score(y_test, binary_predictions)

# You can also get the confusion matrix
conf_matrix = confusion_matrix(y_test, binary_predictions)

# Print the metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")
print(f"Confusion Matrix:\n{conf_matrix}")



Accuracy: 0.6417
Precision: 0.5368
Recall: 0.4073
F1-Score: 0.4632
Confusion Matrix:
[[54388 14891]
 [25109 17255]]


Accuracy: 0.6417\
Precision: 0.5368\
Recall: 0.4073\
F1-Score: 0.4632\
Confusion Matrix:\
[[54388 14891]\
 [25109 17255]]

```
Epoch 1/50
11165/11165 [==============================] - 85s 7ms/step - loss: 0.6827 - accuracy: 0.5718 - precision_1: 0.4454 - recall_1: 0.4916 - auc_1: 0.5823 - val_loss: 0.6628 - val_accuracy: 0.6045 - val_precision_1: 0.4770 - val_recall_1: 0.4297 - val_auc_1: 0.6040
Epoch 2/50
11165/11165 [==============================] - 79s 7ms/step - loss: 0.6736 - accuracy: 0.5846 - precision_1: 0.4597 - recall_1: 0.4962 - auc_1: 0.5974 - val_loss: 0.6644 - val_accuracy: 0.6273 - val_precision_1: 0.5130 - val_recall_1: 0.3687 - val_auc_1: 0.6157
Epoch 3/50
11165/11165 [==============================] - 82s 7ms/step - loss: 0.6716 - accuracy: 0.5874 - precision_1: 0.4639 - recall_1: 0.5130 - auc_1: 0.6046 - val_loss: 0.6840 - val_accuracy: 0.5473 - val_precision_1: 0.4396 - val_recall_1: 0.6989 - val_auc_1: 0.6218
Epoch 4/50
11165/11165 [==============================] - 82s 7ms/step - loss: 0.6669 - accuracy: 0.5971 - precision_1: 0.4758 - recall_1: 0.5350 - auc_1: 0.6198 - val_loss: 0.6516 - val_accuracy: 0.6265 - val_precision_1: 0.5085 - val_recall_1: 0.4996 - val_auc_1: 0.6400
Epoch 5/50
11165/11165 [==============================] - 81s 7ms/step - loss: 0.6647 - accuracy: 0.6015 - precision_1: 0.4810 - recall_1: 0.5422 - auc_1: 0.6255 - val_loss: 0.6608 - val_accuracy: 0.6096 - val_precision_1: 0.4879 - val_recall_1: 0.5621 - val_auc_1: 0.6440
Epoch 6/50
11165/11165 [==============================] - 81s 7ms/step - loss: 0.6638 - accuracy: 0.6036 - precision_1: 0.4834 - recall_1: 0.5436 - auc_1: 0.6282 - val_loss: 0.6534 - val_accuracy: 0.6123 - val_precision_1: 0.4913 - val_recall_1: 0.5840 - val_auc_1: 0.6483
Epoch 7/50
11165/11165 [==============================] - 80s 7ms/step - loss: 0.6629 - accuracy: 0.6033 - precision_1: 0.4832 - recall_1: 0.5505 - auc_1: 0.6303 - val_loss: 0.6499 - val_accuracy: 0.6505 - val_precision_1: 0.5571 - val_recall_1: 0.3888 - val_auc_1: 0.6484
Epoch 8/50
11165/11165 [==============================] - 83s 7ms/step - loss: 0.6621 - accuracy: 0.6052 - precision_1: 0.4854 - recall_1: 0.5491 - auc_1: 0.6322 - val_loss: 0.6401 - val_accuracy: 0.6449 - val_precision_1: 0.5432 - val_recall_1: 0.4096 - val_auc_1: 0.6447
Epoch 9/50
11165/11165 [==============================] - 81s 7ms/step - loss: 0.6614 - accuracy: 0.6073 - precision_1: 0.4877 - recall_1: 0.5468 - auc_1: 0.6340 - val_loss: 0.6598 - val_accuracy: 0.5901 - val_precision_1: 0.4711 - val_recall_1: 0.6467 - val_auc_1: 0.6486
Epoch 10/50
11165/11165 [==============================] - 83s 7ms/step - loss: 0.6610 - accuracy: 0.6067 - precision_1: 0.4870 - recall_1: 0.5498 - auc_1: 0.6345 - val_loss: 0.6436 - val_accuracy: 0.6503 - val_precision_1: 0.5497 - val_recall_1: 0.4383 - val_auc_1: 0.6603
Epoch 11/50
11165/11165 [==============================] - 80s 7ms/step - loss: 0.6601 - accuracy: 0.6083 - precision_1: 0.4890 - recall_1: 0.5564 - auc_1: 0.6371 - val_loss: 0.6600 - val_accuracy: 0.6069 - val_precision_1: 0.4864 - val_recall_1: 0.6273 - val_auc_1: 0.6602
```

  from kerastuner import HyperModel


Initialize the Tuner and Start the Search

Trial 10 Complete [00h 14m 38s]
val_accuracy: 0.6420661807060242

Best val_accuracy So Far: 0.6638377010822296
Total elapsed time: 01h 52m 42s
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50


Trial 10 Complete [00h 14m 38s]

val_accuracy: 0.6420661807060242

Best val_accuracy So Far: 0.6638377010822296

Total elapsed time: 01h 52m 42s