# TODO:
* Tune keras just on vanilla dataset and see what's the best score we get
* Add original data to the mix with training on both datasets but validation only on competition dataset and see how good it performs
* Do feature engineering based on time feature and see how good that performs

# Important Note:
Now that I think about it, if we want to take the time features into account, we'll have to split the dataset for training and validation such that the validation comes after the training.
So i don't think we need Cross Validation for this one... Let's see

# Imports

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
from pathlib import Path
import xgboost as xgb
import lightgbm as lgbm
import catboost
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import roc_auc_score
from IPython.display import display
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, OrdinalEncoder
import optuna
from sklearn.preprocessing import StandardScaler
from scipy.linalg import norm

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import keras_tuner as kt

from category_encoders import LeaveOneOutEncoder

from IPython.display import display

# Loading Data

In [2]:
BASE_PATH = Path("/kaggle/input/playground-series-s3e4/")

train = pd.read_csv(BASE_PATH / "train.csv")
test = pd.read_csv(BASE_PATH / "test.csv")

original = pd.read_csv("/kaggle/input/creditcardfraud/creditcard.csv")

print(f"Training dataset has {len(train)} rows with " \
                    f"{sum(train.Class) / len(train) * 100 :.2}% fraud rows.")
print(f"Original dataset has {len(original)} rows with " \
                    f"{sum(original.Class) / len(original) * 100 :.2}% fraud rows.")

Training dataset has 219129 rows with 0.21% fraud rows.
Original dataset has 284807 rows with 0.17% fraud rows.


# Feature Engineering

In [3]:
train["Hour"] = ((train.Time // 3600) % 24).astype("category")
test["Hour"] = ((test.Time // 3600) % 24).astype("category")
original["Hour"] = ((original.Time // 3600) % 24).astype("category")

In [4]:
train["is_night"] = train.Hour.map(lambda x: int(x <= 6))
test["is_night"] = test.Hour.map(lambda x: int(x <= 6))
original["is_night"] = test.Hour.map(lambda x: int(x <= 6))

In [5]:
X = train.drop(columns=["id", "Class"])
y = train.Class

In [6]:
X_test = test.drop(columns="id")

In [7]:
aaa = np.arange(10)
ooo = np.arange(11, 20)
aaa, ooo

(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
 array([11, 12, 13, 14, 15, 16, 17, 18, 19]))

In [8]:
sc = StandardScaler()
sc.fit(aaa.reshape(-1, 1))
sc.transform(aaa.reshape(-1, 1)), sc.transform(ooo.reshape(-1, 1))

(array([[-1.5666989 ],
        [-1.21854359],
        [-0.87038828],
        [-0.52223297],
        [-0.17407766],
        [ 0.17407766],
        [ 0.52223297],
        [ 0.87038828],
        [ 1.21854359],
        [ 1.5666989 ]]),
 array([[2.26300953],
        [2.61116484],
        [2.95932015],
        [3.30747546],
        [3.65563078],
        [4.00378609],
        [4.3519414 ],
        [4.70009671],
        [5.04825202]]))

In [9]:
feats_to_scale = ["Time","Amount"]
feats_to_encode = ["Hour"]

X = pd.get_dummies(X)

sc = StandardScaler()
sc.fit(X[feats_to_scale])
X[feats_to_scale] = sc.transform(X[feats_to_scale])
X_test[feats_to_scale] = sc.transform(X_test[feats_to_scale])

In [10]:
X.shape

(219129, 55)

In [11]:
early_stopping = keras.callbacks.EarlyStopping(
                patience=5,
                min_delta=0.001,
                monitor="val_auc",
                restore_best_weights=True,
                )

reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1,
                              patience=5, min_lr=0.001)

In [12]:
# Since this dataset contains a time axis, so we'll make sure we're validating on the future
# we'll use 20% of the data for validation
val_size = int(len(X) * 0.20)

X_train, y_train = X.iloc[:-val_size, :], y.iloc[:-val_size]
X_val, y_val = X.iloc[-val_size:, :], y.iloc[-val_size:]

### As a sanity check, let's verify that the validation data follows the training data

In [13]:
X_train.tail()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,Hour_14.0,Hour_15.0,Hour_16.0,Hour_17.0,Hour_18.0,Hour_19.0,Hour_20.0,Hour_21.0,Hour_22.0,Hour_23.0
175299,0.702903,-1.154928,0.162923,1.747028,0.991513,-0.723126,1.415901,-0.162637,0.852376,0.402856,...,0,0,0,0,0,0,0,0,1,0
175300,0.702903,-0.835293,1.172409,1.591367,-0.205889,-0.192612,-0.201252,0.492998,0.206925,-0.235648,...,0,0,0,0,0,0,0,0,1,0
175301,0.702903,-1.1459,1.037252,1.122627,-1.392541,0.360199,0.846446,-0.08797,0.558683,0.009814,...,0,0,0,0,0,0,0,0,1,0
175302,0.702903,1.276834,-0.147334,0.094576,-0.257049,-0.020925,0.407977,-0.503501,0.158691,0.382581,...,0,0,0,0,0,0,0,0,1,0
175303,0.702903,-0.673439,0.892115,1.69098,-1.353868,0.252316,-0.82944,1.012523,-0.259957,-0.431717,...,0,0,0,0,0,0,0,0,1,0


In [14]:
X_val.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,Hour_14.0,Hour_15.0,Hour_16.0,Hour_17.0,Hour_18.0,Hour_19.0,Hour_20.0,Hour_21.0,Hour_22.0,Hour_23.0
175304,0.702903,-0.983733,0.632487,1.829496,1.669713,-0.039582,1.001766,-0.481212,0.854339,-0.395769,...,0,0,0,0,0,0,0,0,1,0
175305,0.702903,0.941208,-0.390937,1.085723,1.26694,-0.689895,1.017845,-0.695058,0.417781,0.687824,...,0,0,0,0,0,0,0,0,1,0
175306,0.702903,1.225552,0.005515,-0.031295,-0.070925,0.035704,0.211637,-0.188959,0.176838,0.038258,...,0,0,0,0,0,0,0,0,1,0
175307,0.702903,0.880412,-0.717796,0.680402,0.064291,-0.888881,0.024663,-0.510728,0.176749,0.386618,...,0,0,0,0,0,0,0,0,1,0
175308,0.702903,-0.508916,0.767204,1.56733,0.540207,0.556598,0.677702,0.278064,0.320033,-0.068448,...,0,0,0,0,0,0,0,0,1,0


In [25]:
# def model_builder(hp):
#     inputs = layers.Input(shape=(55,))
    
#     hp_units_1 = hp.Int("units_1", min_value=512, max_value=2048, step=256)
#     x = layers.Dense(hp_units_1, activation="relu")(inputs)
#     x = layers.BatchNormalization()(x)
#     hp_dropout_1 = hp.Float("dropout_1", min_value=0, max_value=0.8, step=0.1)
#     x = layers.Dropout(hp_dropout_1)(x)
    
#     hp_units_2 = hp.Int("units_2", min_value=256, max_value=1024, step=128)
#     x = layers.Dense(hp_units_2, activation="relu")(x)
#     x = layers.BatchNormalization()(x)
#     hp_dropout_2 = hp.Float("dropout_2", min_value=0, max_value=0.8, step=0.1)
#     x = layers.Dropout(hp_dropout_2)(x)
    
#     hp_units_3 = hp.Int("units_3", min_value=128, max_value=512, step=64)
#     x = layers.Dense(hp_units_3, activation="relu")(x)
#     x = layers.BatchNormalization()(x)
#     hp_dropout_3 = hp.Float("dropout_3", min_value=0, max_value=0.8, step=0.1)
#     x = layers.Dropout(hp_dropout_3)(x)
    
#     hp_units_4 = hp.Int("units_4", min_value=64, max_value=256, step=32)
#     x = layers.Dense(hp_units_4, activation="relu")(x)
#     x = layers.BatchNormalization()(x)
#     hp_dropout_4 = hp.Float("dropout_4", min_value=0, max_value=0.8, step=0.1)
#     x = layers.Dropout(hp_dropout_4)(x)
    
#     hp_units_5 = hp.Int("units_5", min_value=32, max_value=128, step=16)
#     x = layers.Dense(hp_units_5, activation="relu")(x)
#     x = layers.BatchNormalization()(x)
#     hp_dropout_5 = hp.Float("dropout_5", min_value=0, max_value=0.8, step=0.1)
#     x = layers.Dropout(hp_dropout_5)(x)

#     outputs = layers.Dense(1, activation="sigmoid")(x)

#     keras_model = keras.Model(inputs=inputs, outputs=outputs)
    
#     hp_learning_rate = hp.Float("learning_rate", min_value=1e-05, max_value=1e-1, sampling="log")
#     hp_optimizer = hp.Choice("optimizer", ["rmsprop", "adam"])
    
#     if hp_optimizer == "adam":
#         optim = keras.optimizers.Adam(learning_rate=hp_learning_rate)
#     else:
#         optim = keras.optimizers.RMSprop(learning_rate=hp_learning_rate)
        
#     keras_model.compile(optimizer=optim,
#                        loss=keras.losses.binary_crossentropy,
#                        metrics=[keras.metrics.AUC()])
    
#     return keras_model

In [26]:
early_stopping = keras.callbacks.EarlyStopping(
                patience=5,
                min_delta=0.001,
                monitor="val_auc",
                restore_best_weights=True,
                )

reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1,
                              patience=5, min_lr=0.001)

In [27]:
# tuner = kt.Hyperband(model_builder,
#                     objective=kt.Objective("val_auc", direction="max"),
#                     max_epochs=50,
#                     directory="./",
#                     project_name="tuning_keras",
#                     overwrite=True,
#                     )

In [28]:
tuner.search(X_train, y_train, epochs=50, validation_data=(X_val, y_val), batch_size=1024, callbacks=[early_stopping, reduce_lr])

Trial 90 Complete [00h 00m 09s]
val_auc: 0.48167747259140015

Best val_auc So Far: 0.8266593217849731
Total elapsed time: 00h 15m 42s


In [29]:
# best_hps_list = tuner.get_best_hyperparameters(num_trials=5)

In [30]:
# for i, best_hps in enumerate(best_hps_list):
#     print(f"{'-'*15} {i} {'-'*15}")
#     print(best_hps.values)

--------------- 0 ---------------
{'units_1': 1024, 'dropout_1': 0.8, 'units_2': 512, 'dropout_2': 0.1, 'units_3': 448, 'dropout_3': 0.6000000000000001, 'units_4': 160, 'dropout_4': 0.7000000000000001, 'units_5': 128, 'dropout_5': 0.30000000000000004, 'learning_rate': 0.05288468409542797, 'optimizer': 'adam', 'tuner/epochs': 50, 'tuner/initial_epoch': 17, 'tuner/bracket': 1, 'tuner/round': 1, 'tuner/trial_id': '0076'}
--------------- 1 ---------------
{'units_1': 1792, 'dropout_1': 0.5, 'units_2': 640, 'dropout_2': 0.6000000000000001, 'units_3': 256, 'dropout_3': 0.30000000000000004, 'units_4': 96, 'dropout_4': 0.4, 'units_5': 112, 'dropout_5': 0.1, 'learning_rate': 0.07922855932951119, 'optimizer': 'adam', 'tuner/epochs': 17, 'tuner/initial_epoch': 6, 'tuner/bracket': 3, 'tuner/round': 2, 'tuner/trial_id': '0035'}
--------------- 2 ---------------
{'units_1': 2048, 'dropout_1': 0.30000000000000004, 'units_2': 1024, 'dropout_2': 0.2, 'units_3': 320, 'dropout_3': 0.7000000000000001, 'un

# FineTuned Keras

In [15]:
inputs = layers.Input(shape=(55,))

x = layers.Dense(1024, activation="relu")(inputs)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.8)(x)

x = layers.Dense(512, activation="relu")(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.1)(x)

x = layers.Dense(448, activation="relu")(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.6)(x)

x = layers.Dense(160, activation="relu")(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.7)(x)

x = layers.Dense(128, activation="relu")(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.3)(x)

outputs = layers.Dense(1, activation="sigmoid")(x)

keras_model = keras.Model(inputs=inputs, outputs=outputs)

optim = keras.optimizers.Adam(learning_rate=0.053)

keras_model.compile(optimizer=optim,
                   loss=keras.losses.binary_crossentropy,
                   metrics=[keras.metrics.AUC()])

2023-01-25 19:13:41.689588: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-25 19:13:41.694133: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-25 19:13:41.694892: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-25 19:13:41.696707: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

In [17]:
keras_model.fit(X_train, y_train, validation_data=(X_val, y_val), 
                batch_size=1024, callbacks=[early_stopping, reduce_lr],
               epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50


<keras.callbacks.History at 0x7f9960449250>

In [19]:
X_test = pd.get_dummies(X_test)

In [20]:
keras_model.predict(X_test)

ValueError: in user code:

    /opt/conda/lib/python3.7/site-packages/keras/engine/training.py:1586 predict_function  *
        return step_function(self, iterator)
    /opt/conda/lib/python3.7/site-packages/keras/engine/training.py:1576 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /opt/conda/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:1286 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /opt/conda/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:2849 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /opt/conda/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:3632 _call_for_each_replica
        return fn(*args, **kwargs)
    /opt/conda/lib/python3.7/site-packages/keras/engine/training.py:1569 run_step  **
        outputs = model.predict_step(data)
    /opt/conda/lib/python3.7/site-packages/keras/engine/training.py:1537 predict_step
        return self(x, training=False)
    /opt/conda/lib/python3.7/site-packages/keras/engine/base_layer.py:1020 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    /opt/conda/lib/python3.7/site-packages/keras/engine/input_spec.py:269 assert_input_compatibility
        ', found shape=' + display_shape(x.shape))

    ValueError: Input 0 is incompatible with layer model: expected shape=(None, 55), found shape=(None, 46)
