In [1]:
import os
import json
from utils import load_datasets, load_target, save_submission
import models
from models.tuning import beyesian_optimization
from models.evaluation import cross_validation_score
from lightgbm.sklearn import LGBMClassifier
from sklearn.ensemble import StackingClassifier, RandomForestClassifier, ExtraTreesClassifier, VotingClassifier
from sklearn.linear_model import RidgeClassifier
from sklearn.model_selection import cross_val_score, StratifiedKFold
import json
from keras.wrappers.scikit_learn  import KerasClassifier
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.pipeline import make_pipeline
from sklearn.neural_network import MLPClassifier
from keras.models import Sequential, load_model
from keras.utils import np_utils
from keras.callbacks import EarlyStopping
from keras.layers.advanced_activations import PReLU
from keras.layers.core import Activation, Dense, Dropout
from tensorflow.keras.layers import BatchNormalization

ModuleNotFoundError: No module named 'utils'

In [2]:
config = json.load(open('./config/default.json'))
# X_train, X_test = load_datasets(["Age", "AgeSplit", "EducationNum"])
X_train, X_test = load_datasets(config['features'])
y_train = load_target('Y')

In [3]:
def nn_shallow_fn():
    """
    This function compiles and returns a Keras model.
    Should be passed to KerasClassifier in the Keras scikit-learn API.
    """
    layers=4
    dropout=0.1
    units=1000
    model = Sequential()
    model.add(Dense(units, input_shape=(X_train.shape[1], )))
    model.add(PReLU())
    model.add(BatchNormalization())
    model.add(Dropout(dropout))

    for l in range(layers - 1):
        model.add(Dense(units))
        model.add(PReLU())
        model.add(BatchNormalization())
        model.add(Dropout(dropout))

    model.add(Dense(2))
    model.add(Activation('sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer='adadelta', metrics=['accuracy'])

    return model

def nn_deep_fn():
    """
    This function compiles and returns a Keras model.
    Should be passed to KerasClassifier in the Keras scikit-learn API.
    """
    layers=10
    dropout=0.1
    units=1000
    model = Sequential()
    model.add(Dense(units, input_shape=(X_train.shape[1], )))
    model.add(PReLU())
    model.add(BatchNormalization())
    model.add(Dropout(dropout))

    for l in range(layers - 1):
        model.add(Dense(units))
        model.add(PReLU())
        model.add(BatchNormalization())
        model.add(Dropout(dropout))

    model.add(Dense(1))
    model.add(Activation('sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer='adagrad', metrics=['accuracy'])

    return model

In [14]:
estimators = [
    ('lgbm-shallow', LGBMClassifier(max_depth=5, random_state=0)),
    ('lgbm-middle', LGBMClassifier(max_depth=8, random_state=0)),
    ('lgbm-deep', LGBMClassifier(max_depth=-1, random_state=0)),
    ('rf', RandomForestClassifier(random_state=0, n_jobs=-1)),
    ('ert', ExtraTreesClassifier(random_state=0, n_jobs=-1)),
    ('ridge', RidgeClassifier(random_state=0)),
    ('nn-shallow',  make_pipeline(StandardScaler(), 
                                 KerasClassifier(build_fn=nn_shallow_fn, batch_size=128, epochs=1000))),
    ('nn-shallow',  make_pipeline(StandardScaler(),
                                 KerasClassifier(build_fn=nn_deep_fn, batch_size=128, epochs=1000)))
   
]
final_estimator = VotingClassifier(
    estimators=[
        ('lgbm-shallow', LGBMClassifier(max_depth=5, random_state=0)),
        ('lgbm-middle', LGBMClassifier(max_depth=8, random_state=0)),
        ('lgbm-deep', LGBMClassifier(max_depth=-1, random_state=0)),
        ('rf', RandomForestClassifier(random_state=0, n_jobs=-1)),
        ('ert', ExtraTreesClassifier(random_state=0, n_jobs=-1)),
        ('ridge', RidgeClassifier(random_state=0)),
        ('nn-shallow',  make_pipeline(StandardScaler(), 
                                 KerasClassifier(build_fn=nn_shallow_fn, batch_size=128, epochs=1000))),
        ('nn-shallow',  make_pipeline(StandardScaler(),
                                 KerasClassifier(build_fn=nn_deep_fn, batch_size=128, epochs=1000)))
    ],
    voting='hard',
    n_jobs=-1
)


In [8]:
model = StackingClassifier(
    estimators=estimators,
    final_estimator=final_estimator,
    n_jobs=-1
)
cv_score = cross_val_score(model, X_train, y_train, n_jobs=-1, verbose=2, cv=StratifiedKFold(n_splits=5, random_state=0, shuffle=True))
print(cv_score)
print(np.mean(cv_score))

[nan nan nan nan nan]
nan


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   5 | elapsed:    0.0s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.1s finished


In [15]:
for model in estimators:
    model=model[1]
    print(model)
    cv_score = cross_val_score(model, X_train, y_train, n_jobs=-1, verbose=2, cv=StratifiedKFold(n_splits=5, random_state=0, shuffle=True))
    print(cv_score)
    print(np.mean(cv_score))

LGBMClassifier(max_depth=5, random_state=0)


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   5 | elapsed:    0.2s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


[0.88186275 0.87009804 0.8745098  0.88284314 0.88284314]
0.8784313725490197
LGBMClassifier(max_depth=8, random_state=0)


[Parallel(n_jobs=-1)]: Done   2 out of   5 | elapsed:    0.3s remaining:    0.4s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


[0.88382353 0.86666667 0.87303922 0.88235294 0.88627451]
0.8784313725490197
LGBMClassifier(random_state=0)


[Parallel(n_jobs=-1)]: Done   2 out of   5 | elapsed:    0.3s remaining:    0.4s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


[0.87990196 0.86470588 0.8745098  0.88578431 0.88382353]
0.8777450980392156
RandomForestClassifier(n_jobs=-1, random_state=0)


[Parallel(n_jobs=-1)]: Done   2 out of   5 | elapsed:    1.0s remaining:    1.6s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    1.2s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    1.2s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


[0.87303922 0.86078431 0.87009804 0.87598039 0.87303922]
0.8705882352941178
ExtraTreesClassifier(n_jobs=-1, random_state=0)


[Parallel(n_jobs=-1)]: Done   2 out of   5 | elapsed:    1.2s remaining:    1.7s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    1.2s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    1.2s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   5 | elapsed:    0.1s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


[0.86813725 0.8504902  0.86372549 0.87205882 0.8627451 ]
0.8634313725490197
RidgeClassifier(random_state=0)
[0.87598039 0.86323529 0.8745098  0.875      0.875     ]
0.8727450980392156
Pipeline(steps=[('standardscaler', StandardScaler()),
                ('kerasclassifier',
                 <keras.wrappers.scikit_learn.KerasClassifier object at 0x7fbb378b22b0>)])
[nan nan nan nan nan]
nan
Pipeline(steps=[('standardscaler', StandardScaler()),
                ('kerasclassifier',
                 <keras.wrappers.scikit_learn.KerasClassifier object at 0x7fbb378b2358>)])
[nan nan nan nan nan]
nan


[Parallel(n_jobs=-1)]: Done   2 out of   5 | elapsed:    0.1s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   5 | elapsed:    0.1s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.1s finished


In [32]:
cv_score = cross_val_score(KerasClassifier(build_fn=nn_deep_fn, batch_size=32, epochs=1000, verbose=2)
                           , X_train, y_train, n_jobs=-1, verbose=2, cv=StratifiedKFold(n_splits=5, random_state=0, shuffle=True))
print(cv_score)
print(np.mean(cv_score))

[nan nan nan nan nan]
nan


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   5 | elapsed:    0.1s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.1s finished


In [4]:
KerasClassifier(build_fn=nn_deep_fn, batch_size=32, epochs=1000, verbose=2).fit(X_train, y_train)

  KerasClassifier(build_fn=nn_deep_fn, batch_size=32, epochs=1000, verbose=2).fit(X_train, y_train)
2022-02-25 10:54:33.896517: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-02-25 10:54:33.900367: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-02-25 10:54:33.903791: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Metal device set to: Apple M1

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB

Epoch 1/1000


2022-02-25 10:54:35.857208: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


319/319 - 23s - loss: 0.7130 - accuracy: 0.6312 - 23s/epoch - 72ms/step
Epoch 2/1000
319/319 - 20s - loss: 0.5903 - accuracy: 0.7174 - 20s/epoch - 64ms/step
Epoch 3/1000
319/319 - 20s - loss: 0.5455 - accuracy: 0.7474 - 20s/epoch - 62ms/step
Epoch 4/1000
319/319 - 18s - loss: 0.5183 - accuracy: 0.7709 - 18s/epoch - 57ms/step
Epoch 5/1000
319/319 - 20s - loss: 0.5098 - accuracy: 0.7663 - 20s/epoch - 62ms/step
Epoch 6/1000
319/319 - 19s - loss: 0.4999 - accuracy: 0.7689 - 19s/epoch - 60ms/step
Epoch 7/1000
319/319 - 19s - loss: 0.4737 - accuracy: 0.7817 - 19s/epoch - 59ms/step
Epoch 8/1000
319/319 - 18s - loss: 0.4687 - accuracy: 0.7833 - 18s/epoch - 58ms/step
Epoch 9/1000
319/319 - 19s - loss: 0.4580 - accuracy: 0.7920 - 19s/epoch - 59ms/step
Epoch 10/1000
319/319 - 19s - loss: 0.4453 - accuracy: 0.8007 - 19s/epoch - 59ms/step
Epoch 11/1000
319/319 - 19s - loss: 0.4346 - accuracy: 0.8031 - 19s/epoch - 60ms/step
Epoch 12/1000
319/319 - 20s - loss: 0.4285 - accuracy: 0.8063 - 20s/epoch - 

<keras.callbacks.History at 0x7f9367435580>