In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam, RMSprop

# Load data
data = pd.read_csv('data.csv')


In [16]:
# view the columns
print(data.columns)

Index(['ClmAdmitDiagnosisCode', 'ClmDiagnosisCode_1', 'ClmDiagnosisCode_2',
       'ClmDiagnosisCode_9', 'ClmDiagnosisCode_10', 'Gender', 'Race',
       'RenalDiseaseIndicator', 'State', 'County', 'ChronicCond_Alzheimer',
       'ChronicCond_Heartfailure', 'ChronicCond_KidneyDisease',
       'ChronicCond_Cancer', 'ChronicCond_ObstrPulmonary',
       'ChronicCond_Depression', 'ChronicCond_Diabetes',
       'ChronicCond_IschemicHeart', 'ChronicCond_Osteoporasis',
       'ChronicCond_rheumatoidarthritis', 'ChronicCond_stroke',
       'PotentialFraud', 'Age', 'WeekendAdmission', 'IsDead',
       'ClaimSettlementDelay_Cat', 'TreatmentDuration_Cat',
       'Log_TotalClaimAmount', 'Log_IPTotalAmount', 'Log_OPTotalAmount',
       'UniquePhysCount', 'IsSamePhysMultiRole1', 'IsSamePhysMultiRole2',
       'PHY412132', 'PHY337425', 'PHY330576'],
      dtype='object')


In [17]:
# Convert target to binary and split data

train_data, test_data = train_test_split(data, test_size=0.2, random_state=123)

# Separate features and target variable
X_train = train_data.drop('PotentialFraud', axis=1)
y_train = train_data['PotentialFraud']
X_test = test_data.drop('PotentialFraud', axis=1)
y_test = test_data['PotentialFraud']


In [18]:
print(y_train.unique())


[1 0]


Define the Hypermodel

In [21]:
from kerastuner import HyperModel
from kerastuner.tuners import RandomSearch

class MyHyperModel(HyperModel):
    def __init__(self, input_shape):
        self.input_shape = input_shape

    def build(self, hp):
        model = Sequential()
        model.add(Dense(
            units=hp.Int('units', min_value=32, max_value=512, step=32),
            activation=hp.Choice('activation', ['relu', 'tanh']),
            input_shape=self.input_shape))
        model.add(Dropout(rate=hp.Float('dropout', min_value=0.0, max_value=0.5, step=0.1)))
        for i in range(hp.Int('num_layers', 1, 3)):
            model.add(Dense(
                units=hp.Int(f'units_{i}', min_value=32, max_value=512, step=32),
                activation=hp.Choice(f'activation_{i}', ['relu', 'tanh'])))
            model.add(Dropout(rate=hp.Float(f'dropout_{i}', min_value=0.0, max_value=0.5, step=0.1)))
        model.add(Dense(1, activation='sigmoid'))
        model.compile(
            optimizer=keras.optimizers.Adam(
                hp.Float('learning_rate', 1e-4, 1e-2, sampling='log')),
            loss='binary_crossentropy',
            metrics=['accuracy'])
        return model


  from kerastuner import HyperModel


Initialize the Tuner and Start the Search

In [23]:
# Instantiate the hypermodel
hypermodel = MyHyperModel(input_shape=(X_train.shape[1],))

# Initialize the Random Search tuner
tuner = RandomSearch(
    hypermodel,
    objective='val_accuracy',
    max_trials=10,
    executions_per_trial=2,
    directory='my_dir',
    project_name='keras_tuner_demo')

# Perform hyperparameter tuning without the early stopping callback
tuner.search(X_train, y_train, epochs=10, validation_split=0.2)

# After tuning, retrieve the best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]




Trial 10 Complete [00h 14m 38s]
val_accuracy: 0.6420661807060242

Best val_accuracy So Far: 0.6638377010822296
Total elapsed time: 01h 52m 42s
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50


Trial 10 Complete [00h 14m 38s]

val_accuracy: 0.6420661807060242

Best val_accuracy So Far: 0.6638377010822296

Total elapsed time: 01h 52m 42s

In [None]:
# Build the model with the best hyperparameters
best_model = tuner.hypermodel.build(best_hps)

# Now, compile the best model with your optimizer and loss function
best_model.compile(optimizer=Adam(best_hps.get('learning_rate')), loss='binary_crossentropy', metrics=['accuracy'])

# Then, fit the model with the early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
history = best_model.fit(X_train, y_train, epochs=50, validation_split=0.2, class_weight=class_weights, callbacks=[early_stopping])