In [15]:
import itertools
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Normalizer, OneHotEncoder, StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split

### DataSource

In [19]:
class DataSource:
    
    def __init__(self, filename):
        self.df = pd.read_csv(filename)
        
    def data_load_split(self, target=None, ignore=None):
        self.target = target
        self.ignore = ignore or []
        self.inputs = sorted(set(self.df.columns) - set(self.target) - set(self.ignore))
        
        self.X = self.df[self.inputs]
        self.y = self.df[self.target]
        
        return self.X, self.y
    
    def define_problem(self):
        if self.y.dtypes[0] in ['int64', 'float64'] and self.y.nunique()[0] == 2:
            self.problem = "Binary"
        elif self.y.dtypes[0] in ['object', 'bool']:
            self.problem = "Classification"
        else:
            self.problem = "Regression"
    
        return self.problem
    
    def data_preprocess(self, X, y, problem="Regression"):

        # Data type detection
        numerical_ix = X.select_dtypes(include=['int64', 'float64']).columns
        categorical_ix = y.select_dtypes(include=['object', 'bool']).columns

        # Data transform
        num_transform = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='mean')),
            ('scaler', StandardScaler())
        ])
        cat_transform = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='constant', fill_value="Missing")),
            ('oh_encoder', OneHotEncoder(sparse=False))
        ])

        self.transform_x = ColumnTransformer(transformers=[
            ('num', num_transform, numerical_ix),
            ('cat', cat_transform, categorical_ix)
        ])
        
        if problem == "Classification":
            self.transform_y = ColumnTransformer(transformers=[
                ('cat', cat_transform, y.columns)
            ])
        elif problem == "Regression":
            self.transform_y = ColumnTransformer(transformers=[
                ('num', MinMaxScaler(), y.columns)
            ])
        else:
            self.transform_y = ColumnTransformer(transformers=[
                ('num', Normalizer(), y.columns)
            ])
            
        trans_X = self.transform_x.fit_transform(X)
        trans_y = self.transform_y.fit_transform(y)

        return trans_X, trans_y
    
    def train_val_split(self, X, y, ratio=0.2, random_state=42):
        return train_test_split(X, y, test_size=ratio, random_state=random_state)

In [20]:
filename="./data/sberbank_train_reduced_10k.csv"
ds = DataSource(filename)

In [21]:
X, y = ds.data_load_split(target=['price_doc'],)
problem = ds.define_problem()
X_train, X_val, y_train, y_val = ds.train_val_split(X, y, ratio=0.2, random_state=42)

trans_X_train, trans_y_train = ds.data_preprocess(X_train, y_train, problem=problem)

In [22]:
trans_X_val = ds.transform_x.fit_transform(X_val)
trans_y_val = ds.transform_y.fit_transform(y_val)

In [32]:
trans_X_train.shape

(8000, 55)

In [33]:
trans_X_val.shape

(2000, 55)

### MLP

In [35]:
class MLP:
    
    def __init__(self, X, y, problem="Regression"):
        self.X = X
        self.y = y
        self.problem = problem
        tf.random.set_seed(42)
        
    def build_structure(self, max_hidden_layers=1, units=[16], use_all=False):
        self.structures = []
        self.structures_info = []
        if use_all:
            self.max_hidden_layers = 3
            self.units = [16, 32, 64, 128, 256]
        else:
            self.max_hidden_layers = max_hidden_layers
            self.units = units
    
        grid = [np.arange(self.max_hidden_layers)+1, self.units]
        for param_tuple in itertools.product(*grid):
            structure_param = {'hidden_layers': param_tuple[0],
                                'units': param_tuple[1]}

            # input layer
            model = keras.Sequential()
            model.add(keras.layers.Dense(16, input_shape=(self.X.shape[1],)))

            # hidden layer block
            for _ in range(structure_param['hidden_layers']):
                model.add(keras.layers.Dense(structure_param['units'], activation='relu'))

            # output layer
            if problem == 'Regression':
                model.add(keras.layers.Dense(1))
            elif problem == 'Binary':
                model.add(keras.layers.Dense(1, activation='sigmoid'))
            else:
                model.add(keras.layers.Dense(self.y.shape[1], activation='softmax'))

            self.structures.append(model)
            self.structures_info.append(structure_param)

        return self.structures, self.structures_info
    
    def create_optimizer(self, optimizers=['adam'], lrs=[0.01], use_all=False):
        self.created_optimizers = []
        self.optimizers_info = []
        self.optimizers = optimizers
        self.lrs = lrs

        self.optimizer_classes = {'adadelta': keras.optimizers.Adadelta, 'sgd': keras.optimizers.SGD,
                                  'adam': keras.optimizers.Adam, 'adagrad': keras.optimizers.Adagrad,
                                  'adamax': keras.optimizers.Adamax, 'rmsprop': keras.optimizers.RMSprop}

        if use_all:
            self.lrs = [0.001, 0.01, 0.02, 0.1]
            opt_grid = [self.optimizer_classes.keys(), self.lrs]    
        else:
            opt_grid = [self.optimizers, self.lrs]

        for opt_tuple in itertools.product(*opt_grid):
            opt_param = {
                'optimizer_name': opt_tuple[0],
                'lr': opt_tuple[1]
            }

            opt_class = self.optimizer_classes.get(opt_param['optimizer_name'])
            self.created_optimizers.append(opt_class(opt_param['lr']))
            self.optimizers_info.append(opt_param)

        return self.created_optimizers, self.optimizers_info
    
    def _compile_model(self):
        if self.problem == "Regression":
            self.loss = keras.losses.MSE
            self.metrics = ['MSE', 'MAE']
        elif self.problem == "Binary":
            self.loss = keras.losses.binary_crossentropy
            self.metrics = ['accuracy']
        else:
            self.loss = keras.losses.categorical_crossentropy
            self.metrics = ['accuracy']

        self.compiled_models = []
        self.compiled_models_info = []

        compile_grid = [zip(self.structures, self.structures_info), zip(self.created_optimizers ,self.optimizers_info)]
        for compile_tuple in itertools.product(*compile_grid):
            compile_param = {'model': compile_tuple[0][0],
                             'optimizer': compile_tuple[1][0]}
            model_info = {'structure_info': compile_tuple[0][1],
                           'optimizer_info': compile_tuple[1][1]}

            model_body = compile_param['model']
            model = keras.models.clone_model(model_body)
            model.compile(optimizer=compile_param['optimizer'],
                               loss=self.loss,
                               metrics=self.metrics)

            self.compiled_models.append(model)
            self.compiled_models_info.append(model_info)

        return self.compiled_models, self.compiled_models_info
    
    def train_models(self, models, X_train, y_train, X_val=None, y_val=None,
                     batch_size=None, epochs=1, verbose=0, callbacks=None,
                     shuffle=True, steps_per_epoch=None):
        self.X_train = X_train
        self.y_train = y_train
        self.X_val = X_val
        self.y_val = y_val

        self.trained_models = []
        self.val_losses = []
        for model in models:
            model.fit(x=self.X_train, y=self.y_train,
                      batch_size=batch_size, epochs=epochs,
                      verbose=verbose, callbacks=callbacks,
                      validation_data=(self.X_val, self.y_val), shuffle=shuffle)
            
            val_loss = model.evaluate(self.X_val, self.y_val, verbose=0)
            self.trained_models.append(model)
            self.val_losses.append(val_loss[0])
            print("{} model is trained. best val loss is : {}".format(model.name, val_loss))
            
        return self.trained_models
    
    
def select_best_model(trained_models, val_losses, models_info):
    best_idx = np.argmin(val_losses)
    best_model = trained_models[best_idx]
    best_model_info = compiled_models_info[best_idx]
    return best_model, best_model_info

In [37]:
mlp = MLP(X=trans_X_train, y=trans_y_train, problem=problem)
structures, structures_info = mlp.build_structure(max_hidden_layers=2, units=[16, 32], use_all=True)
created_optimizers, optimizers_info = mlp.create_optimizer(optimizers=['adam', 'adadelta'], lrs=[0.1, 0.2], use_all=True)
compiled_models, compiled_models_info = mlp._compile_model()

callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

trained_models = mlp.train_models(compiled_models,
                                  trans_X_train, trans_y_train, trans_X_val, trans_y_val,
                                  batch_size=64, epochs=10,
                                  callbacks=[callback])

sequential_23 model is trained. best val loss is : [1.5426938471462488, 1.5426938476577219, 0.8028030406532123]
sequential_23 model is trained. best val loss is : [0.26997738047184844, 0.26997738027059, 0.37271916786639686]
sequential_23 model is trained. best val loss is : [0.12815186297477993, 0.12815186303487006, 0.2822619974671887]
sequential_23 model is trained. best val loss is : [0.013596636490647357, 0.013596636491647657, 0.08489661350371575]
sequential_23 model is trained. best val loss is : [0.04099347772264143, 0.0409934776279976, 0.15922668849975]
sequential_23 model is trained. best val loss is : [0.007558380631618061, 0.007558380629053843, 0.06510916424883029]
sequential_23 model is trained. best val loss is : [0.004840169854230481, 0.004840169847452984, 0.0481988723034546]
sequential_23 model is trained. best val loss is : [0.0026536231393056962, 0.0026536231355129285, 0.03089922631252854]
sequential_23 model is trained. best val loss is : [0.0030110384427745112, 0.00301

sequential_25 model is trained. best val loss is : [0.004411258896369763, 0.004411258904715025, 0.04009342484330096]
sequential_26 model is trained. best val loss is : [0.7247727075601585, 0.724772704673542, 0.3970877446503417]
sequential_26 model is trained. best val loss is : [0.021806385936429747, 0.021806385951075837, 0.11047905182946656]
sequential_26 model is trained. best val loss is : [0.013194154681685109, 0.013194154680957493, 0.08642695759725866]
sequential_26 model is trained. best val loss is : [0.0031794058093479274, 0.0031794058037693355, 0.037229485771574845]
sequential_26 model is trained. best val loss is : [0.01717804102402407, 0.017178041006730763, 0.09508371687411851]
sequential_26 model is trained. best val loss is : [0.004415199049132456, 0.004415199066191099, 0.04505953244472417]
sequential_26 model is trained. best val loss is : [0.0037626994551605247, 0.003762699468530455, 0.041426723858651336]
sequential_26 model is trained. best val loss is : [0.002379054644

sequential_28 model is trained. best val loss is : [0.0018423491347727815, 0.0018423491396005627, 0.026014947108670493]
sequential_28 model is trained. best val loss is : [0.002399900435104983, 0.0023999004327134076, 0.02719680291893536]
sequential_28 model is trained. best val loss is : [0.004942157237688339, 0.004942157240773376, 0.04050996736458455]
sequential_29 model is trained. best val loss is : [0.06715753095174039, 0.06715753092828342, 0.2074537263157805]
sequential_29 model is trained. best val loss is : [0.03062579623946249, 0.030625796167079677, 0.1285154956616974]
sequential_29 model is trained. best val loss is : [0.03250585514965607, 0.032505855107696664, 0.1402695687302912]
sequential_29 model is trained. best val loss is : [0.004443128858753335, 0.004443128852016515, 0.04559612607839477]
sequential_29 model is trained. best val loss is : [0.01922836695342112, 0.019228366959818104, 0.10502555863256091]
sequential_29 model is trained. best val loss is : [0.00699310612851

sequential_31 model is trained. best val loss is : [0.0027548176640156026, 0.0027548176442357955, 0.03296896372923923]
sequential_31 model is trained. best val loss is : [0.0018011611908823576, 0.0018011611876488714, 0.024475471990571998]
sequential_31 model is trained. best val loss is : [0.0020996766971701987, 0.0020996767050072917, 0.02688963640192672]
sequential_31 model is trained. best val loss is : [0.0036851696834721, 0.0036851696896951867, 0.03947075853466162]
sequential_31 model is trained. best val loss is : [0.004407820582241836, 0.0044078205403502, 0.040110723274476436]
sequential_32 model is trained. best val loss is : [0.009935713756323869, 0.009935713763736245, 0.07459258143859604]
sequential_32 model is trained. best val loss is : [0.003931676362744588, 0.00393167634357788, 0.04301147721324341]
sequential_32 model is trained. best val loss is : [0.0029743539062256495, 0.0029743539071651628, 0.03493562132455395]
sequential_32 model is trained. best val loss is : [0.0025

sequential_34 model is trained. best val loss is : [0.002036486947557898, 0.0020364869498291113, 0.027105158695012694]
sequential_34 model is trained. best val loss is : [0.0020057337847863704, 0.0020057337881339817, 0.026813949635163373]
sequential_34 model is trained. best val loss is : [0.0019531729972667097, 0.001953172999523731, 0.02771074154130418]
sequential_34 model is trained. best val loss is : [0.001977269620064292, 0.0019772696196685676, 0.02550871566887787]
sequential_34 model is trained. best val loss is : [0.0021336145059811383, 0.0021336145109141463, 0.025564541451711466]
sequential_34 model is trained. best val loss is : [0.0029841960773115445, 0.002984196078934526, 0.0351417306840124]
sequential_34 model is trained. best val loss is : [0.005854560408327293, 0.0058545603861054505, 0.04676303360954193]
sequential_35 model is trained. best val loss is : [0.02820921091460523, 0.02820921087211466, 0.1255094821123345]
sequential_35 model is trained. best val loss is : [0.00

sequential_37 model is trained. best val loss is : [0.001939350528267565, 0.001939350534173659, 0.024042195686408082]
sequential_37 model is trained. best val loss is : [0.0021038861199230803, 0.002103886110677654, 0.026225918022994243]
sequential_37 model is trained. best val loss is : [0.0017598167278849736, 0.0017598167254962516, 0.024552199203830383]
sequential_37 model is trained. best val loss is : [0.001861719940259997, 0.0018617199387171482, 0.023992456695258093]
sequential_37 model is trained. best val loss is : [0.0030891601493566855, 0.003089160144767361, 0.03805363646616729]
sequential_37 model is trained. best val loss is : [0.001911016814159407, 0.001911016824788317, 0.023740891206744788]
sequential_37 model is trained. best val loss is : [0.0024192765440076004, 0.002419276530020051, 0.028735235181155824]
sequential_37 model is trained. best val loss is : [0.004508070582353881, 0.0045080705845170896, 0.04405487537802289]
sequential_37 model is trained. best val loss is : 

In [38]:
for i in range(len(trained_models)):
    print(trained_models[i].optimizer)

<tensorflow.python.keras.optimizer_v2.adadelta.Adadelta object at 0x14dd20c90>
<tensorflow.python.keras.optimizer_v2.adadelta.Adadelta object at 0x149b1f810>
<tensorflow.python.keras.optimizer_v2.adadelta.Adadelta object at 0x149b1f6d0>
<tensorflow.python.keras.optimizer_v2.adadelta.Adadelta object at 0x149b1f9d0>
<tensorflow.python.keras.optimizer_v2.gradient_descent.SGD object at 0x149b1f750>
<tensorflow.python.keras.optimizer_v2.gradient_descent.SGD object at 0x149ad9ed0>
<tensorflow.python.keras.optimizer_v2.gradient_descent.SGD object at 0x149b2acd0>
<tensorflow.python.keras.optimizer_v2.gradient_descent.SGD object at 0x149b2ac50>
<tensorflow.python.keras.optimizer_v2.adam.Adam object at 0x149b32250>
<tensorflow.python.keras.optimizer_v2.adam.Adam object at 0x149ad9690>
<tensorflow.python.keras.optimizer_v2.adam.Adam object at 0x149ad9510>
<tensorflow.python.keras.optimizer_v2.adam.Adam object at 0x149b32550>
<tensorflow.python.keras.optimizer_v2.adagrad.Adagrad object at 0x149b32

<tensorflow.python.keras.optimizer_v2.adagrad.Adagrad object at 0x149b32390>
<tensorflow.python.keras.optimizer_v2.adagrad.Adagrad object at 0x149b32190>
<tensorflow.python.keras.optimizer_v2.adagrad.Adagrad object at 0x149b32a90>
<tensorflow.python.keras.optimizer_v2.adagrad.Adagrad object at 0x149b32ad0>
<tensorflow.python.keras.optimizer_v2.adamax.Adamax object at 0x149b32b10>
<tensorflow.python.keras.optimizer_v2.adamax.Adamax object at 0x149b32b50>
<tensorflow.python.keras.optimizer_v2.adamax.Adamax object at 0x149b32b90>
<tensorflow.python.keras.optimizer_v2.adamax.Adamax object at 0x149b32bd0>
<tensorflow.python.keras.optimizer_v2.rmsprop.RMSprop object at 0x149b32c10>
<tensorflow.python.keras.optimizer_v2.rmsprop.RMSprop object at 0x149b32c50>
<tensorflow.python.keras.optimizer_v2.rmsprop.RMSprop object at 0x149b32c90>
<tensorflow.python.keras.optimizer_v2.rmsprop.RMSprop object at 0x149b32cd0>
<tensorflow.python.keras.optimizer_v2.adadelta.Adadelta object at 0x14dd20c90>
<tens

In [39]:
best_model, best_model_info = select_best_model(trained_models, mlp.val_losses, compiled_models_info)

In [41]:
best_model.evaluate(trans_X_val, trans_y_val)



[0.0017354692378375707, 0.0017354692421585052, 0.024223466035665105]

In [42]:
best_model_info

{'structure_info': {'hidden_layers': 3, 'units': 128},
 'optimizer_info': {'optimizer_name': 'adamax', 'lr': 0.02}}