In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler

import warnings
warnings.filterwarnings("ignore")
pd.set_option('display.max_rows', 101)
pd.set_option('display.max_columns', 101)

import os

import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout,BatchNormalization
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras import backend as K
import kerastuner as kt

from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from bayes_opt import BayesianOptimization
from skopt import BayesSearchCV

In [None]:
train=pd.read_csv("../input/tabular-playground-series-nov-2021/train.csv", index_col="id")
test=pd.read_csv("../input/tabular-playground-series-nov-2021/test.csv",index_col="id")

In [None]:
X=train.iloc[:,0:-1]
y=train.iloc[:,-1]

In [None]:
scaler = StandardScaler()
X=scaler.fit_transform(X)
X_test=scaler.transform(test)

In [None]:
K.clear_session()

In [None]:
print('TF version:', tf.__version__)
print('GPU devices:', tf.config.list_physical_devices('GPU'))

In [None]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [None]:
X_train,X_valid, y_train, y_valid = train_test_split(X,y, test_size=0.2, random_state=42, stratify=y)

In [None]:
def build_model(hp):

    model = Sequential()
    model.add(Dense(hp.Int('units_0', min_value=32, max_value=256,step=32), activation= "swish",input_dim=100))
    model.add(BatchNormalization())
    model.add(Dropout(hp.Float('dropout_0',min_value=0., max_value=0.7)))

    for i in range(hp.Int('num_layers',min_value=1,max_value=3, step=1)):
        model.add(Dense(hp.Int('units_'+ str(i+1), min_value=32, max_value=256,step=32),
                        activation= "swish"))
        model.add(BatchNormalization())
        model.add(Dropout(hp.Float('dropout_'+ str(i+1),min_value=0., max_value=0.7)))

    model.add(Dense(1,activation='sigmoid'))

    model.compile(optimizer=Adam(hp.Float('lr',min_value=0.00001,max_value=0.1)),
                  loss=BinaryCrossentropy(label_smoothing=hp.Float('label_smoothing',min_value=0.0,max_value=0.1)),
                  metrics=['accuracy'])
    return model


In [None]:
es = EarlyStopping(monitor="val_accuracy", mode="max", patience=10, restore_best_weights=True)    

tuner=kt.BayesianOptimization(build_model,objective=kt.Objective('val_accuracy', direction='max'), max_trials=100, overwrite=True)
tuner.search(X_train, y_train,epochs=100, validation_data=(X_valid, y_valid),
            callbacks=[es], batch_size=1024)

In [None]:
tuner.oracle.get_best_trials(num_trials=1)[0].hyperparameters.values

In [None]:
best_hps = tuner.get_best_hyperparameters(1)[0]
model= tuner.hypermodel.build(best_hps)
model.summary()

In [None]:
reduce_lr = ReduceLROnPlateau(monitor="val_loss", mode="min",  factor=0.2, patience=7,)
history = model.fit(X_train, y_train,
                    validation_data = (X_valid, y_valid),callbacks=[es,reduce_lr ],
                    epochs=100, batch_size=1024)

In [None]:
val_acc_per_epoch = history.history['val_accuracy']
best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1
print(f"Best epoch: {best_epoch}")

In [None]:
_,acc = model.evaluate(X_valid, y_valid)
print("Accuracy",(acc*100), "%")

In [None]:
df_eval = pd.DataFrame({'train_loss': history.history['loss'], 'val_loss': history.history['val_loss'],
                       'train_accuracy': history.history['accuracy'], 'val_accuracy': history.history['val_accuracy']})

plt.plot(df_eval[["train_loss", "val_loss"]], label=["Train","Valid"])
plt.legend()
plt.title("Loss")

In [None]:
plt.plot(df_eval[["train_accuracy", "val_accuracy"]], label=["Train","Valid"])
plt.title("Accuracy")
plt.legend()

In [None]:
from sklearn.metrics import confusion_matrix

pred_valid = model.predict(X_valid).reshape(1,-1)[0]
cm=confusion_matrix(y_valid, (pred_valid>0.5))
sns.heatmap(cm, annot=True)

In [None]:
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_curve, auc, roc_auc_score

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

scores = {fold:None for fold in range(cv.n_splits)}
predictions = []

for fold, (idx_train, idx_valid) in enumerate(cv.split(X, y)):
    X_train, y_train = X[idx_train], y[idx_train]
    X_valid, y_valid = X[idx_valid], y[idx_valid]

    es = EarlyStopping(monitor="val_accuracy", mode="max", patience=10, restore_best_weights=True)    
    reduce_lr = ReduceLROnPlateau(monitor="val_loss", mode="min",  factor=0.2, patience=7,)
    checkpoint = ModelCheckpoint("modelcheckpoint.h5", save_best_only=True,monitor="val_loss",mode="min")

    history = model.fit(X_train, y_train,
                        validation_data = (X_valid, y_valid),
                        epochs=100, batch_size=1024,
                        callbacks=[es, reduce_lr],
                        verbose=0     )
    
    scores[fold] = (history.history)
    
    print(f"Fold {fold} -- Max Validation AUC: {np.max(scores[fold]['val_accuracy'])}")
    
    prediction = model.predict(X_test, batch_size=1024).reshape(1,-1)[0]
    predictions.append(prediction)

print('**'*20)

overall_auc = [np.max(scores[fold]['val_accuracy']) for fold in range(cv.n_splits)]
print('Overall Mean AUC: ', np.mean(overall_auc))

In [None]:
for fold in range(5):
    df_eval = pd.DataFrame({'train_loss': scores[fold]['loss'], 'val_loss': scores[fold]['val_loss'],
                       'train_auc': scores[fold]['accuracy'], 'val_auc': scores[fold]['val_accuracy']})
    
    fig, ax = plt.subplots(1, 2, tight_layout=True, figsize=(10,4))
    fig.suptitle('Fold : '+str(fold), fontsize=14)
    
    plt.subplot(1,2,1)
    plt.plot(df_eval[["train_loss", "val_loss"]], label=["Train","Valid"])
    plt.legend()
    plt.title("Loss")
    
    plt.subplot(1,2,2)
    plt.plot(df_eval[["train_auc", "val_auc"]], label=["Train","Valid"])
    plt.title("Accuracy")
    plt.legend()

In [None]:
### average predictions over each fold and create submission file
sample_submission=pd.read_csv("../input/tabular-playground-series-nov-2021/sample_submission.csv")
sample_submission['target'] = np.mean(np.column_stack(predictions), axis=1)
sample_submission.to_csv('./nn_model.csv', index=False)

In [None]:
sample_submission