In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.sparse import reorder
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
def nn_model(optimizer='adam', dropout=0.5, init='uniform', dense_nparams=256):
    model = Sequential()
    model.add(Dense(dense_nparams, activation='relu', kernel_initializer=init,)) 
    model.add(Dropout(dropout), )
    model.add(Dense(16, activation='relu', kernel_initializer=init,)) 
    model.add(Dropout(dropout), )
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer=optimizer,metrics=['accuracy'])
    return model

class ArrayTransformer():
    
    def transform(self, X, **transform_params):
        return X.toarray()
    
    def fit(self, X, y=None, **fit_params):
        return self
    
early_stop = EarlyStopping(monitor = 'loss', 
                           min_delta = 0, 
                           patience = 3, 
                           mode = 'auto')

In [None]:
# Setting pipeline for two stages
tfid_nn = Pipeline([
    ('tfid', TfidfVectorizer(max_features = 8000, min_df = 3, max_df = 3, ngram_range = (1,2))),
    ('array', ArrayTransformer()),
    ('nn', KerasClassifier(build_fn = nn_model, verbose=1, epochs = 50, 
                           batch_size = 1000,
                           validation_batch_size = 1000,))
])

pipe_params = {
    'nn__dense_nparams': [256,512,1024]
}
    

gs_tfid_nn = GridSearchCV(tfid_nn, # what object are we optimizing?
                  param_grid=pipe_params, # what parameters values are we searching?
                  cv=5, verbose = 1, n_jobs = -1, scoring = 'roc_auc') # 5-fold cross-validation.


gs_tfid_nn.fit(X_train, y_train, nn__validation_split = 0.2, nn__callbacks=[early_stop])



print(f'Best Parameters: {gs_tfid_nn.best_params_}')

In [None]:
print(f'{gs_tfid_nn.score(X_test, y_test)}')

In [None]:
print(f'{matthews_corrcoef(y_test, gs_tfid_nn.predict(X_test))}')