In [67]:
import pickle
import pandas as pd
import numpy as np
import lightgbm as lgb
import time
import tensorflow as tf
import tensorflow.keras.backend as K

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PowerTransformer, StandardScaler, MinMaxScaler
from sklearn.compose import TransformedTargetRegressor
from sklearn.model_selection import KFold

from tensorflow.keras import Sequential, Input, losses
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from tensorflow.keras.optimizers import Adamax
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.regularizers import l1_l2

from bayes_opt import BayesianOptimization, SequentialDomainReductionTransformer

pd.set_option('display.max_columns', None)

In [68]:
from importlib import reload
import run_models, output, process_df
from run_models import create_and_fit_regression_rf, create_and_fit_regression_lgb, create_and_fit_regression_nn, create_regression_lgb, build_nn_model, get_regression_cv_metrics, get_regression_cv_predictions
from output import output_metrics, print_train_history, print_metrics, print_graphs, print_formatted, print_output, print_formatted_params
from process_df import split_df
reload(run_models)
reload(output)
reload(process_df)
from run_models import create_and_fit_regression_rf, create_and_fit_regression_lgb, create_and_fit_regression_nn, create_regression_lgb, build_nn_model, get_regression_cv_metrics, get_regression_cv_predictions
from output import output_metrics, print_train_history, print_metrics, print_graphs, print_formatted, print_output, print_formatted_params
from process_df import split_df

In [69]:
tags = [ 'tag__murder',
 'tag__violence',
 'tag__flashback',
 'tag__romantic',
 'tag__cult',
 'tag__revenge',
 'tag__psychedelic',
 'tag__comedy',
 'tag__suspenseful',
 'tag__good_versus_evil',
 'tag__humor',
 'tag__entertaining',
 'tag__neo_noir',
 'tag__action',
 'tag__boring',
 'tag__other']

In [70]:
# name = 'outliers/final/df'
# with open(f'{name}_data.pickle', 'rb') as handle:
#     data = pickle.load(handle)

# X_train, y_train, X_val, y_val, X_test, y_test = data['X_train'], data['y_train'], data['X_val'], data['y_val'], data['X_test'], data['y_test']
# X = pd.concat([X_train, X_test, X_val])
# y = np.concatenate([y_train, y_test, y_val])

# with open(f'{name}_process.pickle', 'rb') as handle:
#     process = pickle.load(handle)

In [71]:
# df = pd.read_csv('outliers/final/df.csv', index_col='id')

# def get_df_work_columns(df, df_columns):
#     return df[[col for col in df_columns if not 'META' in col or col == 'META__revenue']]

# df = get_df_work_columns(df, df)

# from sklearn.impute import KNNImputer

# imputer = KNNImputer(n_neighbors=30, weights='distance')
# nan_filled = df.copy()
# nan_filled[:] = imputer.fit_transform(df)

# nan_filled.to_csv(f'outliers/final/nan_filled.csv')

In [72]:
# dd = pd.read_csv(f'outliers/final/df.csv', index_col='id')
# cols = [c for c in dd.columns if not 'META' in c or 'revenue' in c]
# just_split_data = split_df(dd[cols])
# with open(f'outliers/final/df_raw_data.pickle', 'wb') as handle:
#     pickle.dump(just_split_data, handle)

In [73]:
# df = pd.read_csv('outliers/final/df.csv', index_col='id')

In [74]:
# missing_values = {}
# df_len = df.shape[0]
# for c in df.columns:
#     if 'META__' not in c:
#         len_missing = len([i for i in df[c] if pd.isna(i)])
#         if len_missing:
#             missing_values[c] = [len_missing, len_missing/df_len]

In [75]:
# sorted_miss = {k: v[1] for k, v in sorted(missing_values.items(), key=lambda item: item[1][1], reverse=True)}

In [76]:
# fig, ax = plt.subplots(figsize=(20,20))
# label = list(sorted_miss.keys())
# values = list(sorted_miss.values())
# ax.barh(label, values)
# for i in range(len(values)):
#     plt.text(values[i] + 0.01, i, round(values[i], 2))
# ax.set_xlabel('Percentage of missing data')
# plt.show()

In [77]:
# len([c for c in X.columns if 'tag' in c])

In [78]:
# # for t in [c for c in X.columns if 'tag' in c]:
# for t in tags:
#     print('\t\t\t\\item',t.replace('_', '\_'))

In [79]:
with open(f'outliers/final/nan_filled_data.pickle', 'rb') as handle:
    data = pickle.load(handle)

X_train, y_train, X_val, y_val, X_test, y_test = data['X_train'], data['y_train'], data['X_val'], data['y_val'], data['X_test'], data['y_test']
X = pd.concat([X_train, X_test, X_val])
# X = X[[c for c in X.columns if 'tag' not in c or c in tags]]
y = np.concatenate([y_train, y_test, y_val])

In [80]:
# Fine tune with bayes optimizator
def bayes_parameter_opt_nn_regression(X, y, init_round=15, opt_round=30, n_folds=10, patience=10, validation_size=0.05, verbose=0):
    def nn_eval(**params_raw):
        params = convert_nn_params(params_raw)
        print(params)
        cv_result = get_regression_cv_metrics(create_and_fit_nn_regressor_with_params, X, y,
            n_splits=n_folds, patience=patience, validation_size=validation_size, regressor_params=params, verbose=verbose)
        return -cv_result['cv_metrics']['smape']

    pbounds = {
        'adamax_learning_rate': (0.0005, 0.1),
        # 'adamax_beta_1': (0.8, 1),
        # 'adamax_beta_2': (0.95, 1),
        # 'batch_size': (4, 1024),
    }
    for l in range(1, 3):
        pbounds[f'l{l}_neurons'] = (64, 1024)
        pbounds[f'l{l}_activation'] = (0, 4)
        pbounds[f'l{l}_kernel_regularizer_l1'] = (0, 0.05)
        pbounds[f'l{l}_kernel_regularizer_l2'] = (0, 0.05)
        pbounds[f'l{l}_dropout'] = (0.1, 0.8)

    bounds_transformer = SequentialDomainReductionTransformer()
    optimizer = BayesianOptimization(
        f = nn_eval, 
        pbounds = pbounds,
        random_state=0,
        bounds_transformer=bounds_transformer,
    )
    optimizer.maximize(init_points=init_round, n_iter=opt_round)
    model_params=[]
    for model in range(len(optimizer.res)):
        model_params.append(optimizer.res[model]['target'])
    return {
        'target': optimizer.res[pd.Series(model_params).idxmax()]['target'], 
        'params': optimizer.res[pd.Series(model_params).idxmax()]['params'],
        'bounds_transformer': bounds_transformer,
        'optimizer': optimizer,
    }

In [81]:
def print_formatted(obj):
    for p in obj:
        val = obj[p]
        if isinstance(val, float):
            val = round(val, 3)
        elif isinstance(val, str):
            val = f'\'{val}\''
        print(f'{p}={val},')

def print_output(obj):
    for p in obj:
        print(f'{p}: {round(obj[p],3)}')
   
def print_formatted_params(params):
    print_formatted(convert_nn_params(params))

In [87]:
def create_and_fit_nn_regressor_with_params(X, y, X_val=None, y_val=None, patience=30, regressor_params={}, verbose=0, validation_split=None):
    start = time.time()
    fit_params = {
        'model__epochs': 10000,
        'model__shuffle': True,
        'model__use_multiprocessing': True,
        'model__verbose': verbose,
        # 'model__batch_size': regressor_params.pop('batch_size'),
        'model__batch_size': 16,
        # 'model__batch_size': X.shape[0],
        'model__callbacks': [EarlyStopping(monitor='val_keras_mape', mode='min', verbose=1, patience=patience)],
    }
    if X_val is None and y_val is None:
        fit_params['model__validation_split']=validation_split
    else:
        fit_params['model__validation_data']=(X_val, y_val)
    model = TransformedTargetRegressor(
        Pipeline([
            ('powertransform', PowerTransformer()),
            ('model', KerasRegressor(build_nn_model_with_params([X.shape[1]], regressor_params))),
        ]),
        PowerTransformer(),
    ).fit(X, y, **fit_params)
    end = time.time()
    print(f'model fit time: {end - start}')
    return model


def build_nn_model_with_params(input_shape, params):
    # def get_model():
    #     # adamax = Adamax(, beta_1=params['adamax_beta_1'], beta_2=params['adamax_beta_2'])
    #     adamax = Adamax(learning_rate=params['adamax_learning_rate'], beta_1=0.958, beta_2=0.987)
    #     hidden_layers = []
    #     for l in range(1, 3):
    #         hidden_layers.append(Dense(
    #             params[f'l{l}_neurons'],
    #             activation=params[f'l{l}_activation'],
    #             # kernel_regularizer=l1_l2(l1=params[f'l{l}_kernel_regularizer_l1'], l2=params[f'l{l}_kernel_regularizer_l2']),
    #             name=f'Dense_{l}',
    #         ))
    #         hidden_layers.append(Dropout(params[f'l{l}_dropout'], name=f'Dropout_{l}'))
        
    #     input_layer = Input(input_shape)
    #     output_layer = Dense(1, name='Output')
    #     model = Sequential([input_layer] + hidden_layers + [output_layer])

    #     model.compile(
    #         loss=losses.MeanSquaredError(),
    #         optimizer=adamax, 
    #         metrics=['mae', 'mse', 'mape'],
    #     )
    #     return model
    # return get_model
    def keras_wape(y_true, y_pred):
        if not tf.is_tensor(y_pred):
            y_pred = K.constant(y_pred)
        y_true = K.cast(y_true, y_pred.dtype)
        return 100*K.sum(K.abs(y_pred - y_true)) / K.constant(y_true.shape[1])

    def keras_mape(y_true, y_pred):
        if not tf.is_tensor(y_pred):
            y_pred = K.constant(y_pred)
        y_true = K.cast(y_true, y_pred.dtype)
        return 100. * K.mean(K.abs((y_true - y_pred) / K.clip(K.abs(y_true),K.epsilon(),None)), axis=-1)

    def get_model():
        adamax = Adamax(learning_rate=0.001,beta_1=0.958,beta_2=0.987)
        model = Sequential([
            Dense(
                256, 
                activation='sigmoid', 
                input_shape=input_shape,
                kernel_initializer='glorot_normal',
                # kernel_regularizer=l1_l2(l1=0.0001, l2=0.0001),
                # bias_regularizer=l1_l2(l1=0.001, l2=0.1)
            ),
            Dropout(0.1),
            Dense(
                256, 
                activation='sigmoid',
                kernel_initializer='glorot_normal',
                # kernel_regularizer=l1_l2(l1=0, l2=0.001),
                # bias_regularizer=l1_l2(l1=0.01, l2=0.01),
            ),
            Dropout(0.5),
            Dense(1, kernel_initializer='glorot_normal')
        ])

        model.compile(
            loss=losses.MeanSquaredError(),
            # loss=losses.MeanSquaredLogarithmicError(),
            # loss=losses.MeanAbsolutePercentageError(),
            # loss=losses.MeanAbsoluteError(),
            # loss=losses.CosineSimilarity(),
            # loss=losses.Huber(),
            # loss=losses.LogCosh(),
            optimizer=adamax,
            metrics=['mae', 'mse', keras_mape, keras_wape],
        )
        return model
    return get_model

In [284]:
cv_result = get_regression_cv_metrics(create_and_fit_nn_regressor_with_params, X, y,
        n_splits=10, patience=5, model_params={}, verbose=1, validation_split=0.05)

poch 3/10000
Epoch 4/10000
Epoch 5/10000
Epoch 6/10000
Epoch 7/10000
Epoch 8/10000
Epoch 9/10000
Epoch 10/10000
Epoch 11/10000
Epoch 12/10000
Epoch 13/10000
Epoch 00013: early stopping
model fit time: 10.685550928115845
split 6
Epoch 1/10000
Epoch 2/10000
Epoch 3/10000
Epoch 4/10000
Epoch 5/10000
Epoch 6/10000
Epoch 00006: early stopping
model fit time: 5.883486270904541
split 7
Epoch 1/10000
Epoch 2/10000
Epoch 3/10000
Epoch 4/10000
Epoch 5/10000
Epoch 6/10000
Epoch 7/10000
Epoch 8/10000
Epoch 9/10000
Epoch 10/10000
Epoch 11/10000
Epoch 12/10000
Epoch 13/10000
Epoch 14/10000
Epoch 15/10000
Epoch 00015: early stopping
model fit time: 11.40432596206665
split 8
Epoch 1/10000
Epoch 2/10000
Epoch 3/10000
Epoch 4/10000
Epoch 5/10000
Epoch 6/10000
Epoch 7/10000
Epoch 00007: early stopping
model fit time: 5.523204326629639
split 9
Epoch 1/10000
Epoch 2/10000
Epoch 3/10000
Epoch 4/10000
Epoch 5/10000
Epoch 6/10000
Epoch 7/10000
Epoch 8/10000
Epoch 9/10000
Epoch 10/10000
Epoch 11/10000
Epoch 12

In [285]:
print_output(cv_result['cv_metrics'])

smape: 67.08
wmape: 43.736
mape: 187.707
mae: 32972996.9
rmse: 74499387.7
adj_r2: 0.608


In [32]:
cv_result = get_regression_cv_metrics(create_and_fit_nn_regressor_with_params, X, y,
        n_splits=10, patience=30, model_params={}, verbose=1, validation_split=0.05)

 mse: 0.2864 - val_loss: 0.2578 - val_mae: 0.3844 - val_mse: 0.2578
Epoch 5/10000
Epoch 6/10000
Epoch 7/10000
Epoch 8/10000
Epoch 9/10000
Epoch 10/10000
Epoch 11/10000
Epoch 12/10000
Epoch 13/10000
Epoch 14/10000
Epoch 15/10000
Epoch 16/10000
Epoch 17/10000
Epoch 18/10000
Epoch 19/10000
Epoch 20/10000
Epoch 21/10000
Epoch 22/10000
Epoch 23/10000
Epoch 24/10000
Epoch 25/10000
Epoch 26/10000
Epoch 27/10000
Epoch 28/10000
Epoch 29/10000
Epoch 30/10000
Epoch 31/10000
Epoch 32/10000
Epoch 33/10000
Epoch 34/10000
Epoch 35/10000
Epoch 36/10000
Epoch 37/10000
Epoch 38/10000
Epoch 39/10000
Epoch 40/10000
Epoch 41/10000
Epoch 42/10000
Epoch 43/10000
Epoch 44/10000
Epoch 45/10000
Epoch 46/10000
Epoch 47/10000
Epoch 48/10000
Epoch 49/10000
Epoch 50/10000
Epoch 51/10000
Epoch 52/10000
Epoch 53/10000
Epoch 54/10000
Epoch 55/10000
Epoch 56/10000
Epoch 57/10000
Epoch 58/10000
Epoch 59/10000
Epoch 60/10000
Epoch 61/10000
Epoch 62/10000
Epoch 63/10000
Epoch 64/10000
Epoch 65/10000
Epoch 66/10000
Epoch 0

In [35]:
print_output(cv_result['cv_metrics'])

smape: 63.93
mape: 187.995
mae: 31762673.1
rmse: 73387374.9
adj_r2: 0.594


In [126]:
def convert_nn_params(params):
    return {}
    # get_float = lambda val: max(min(val, 1), 0)
    # get_int = lambda val: int(round(val))
    # activations = ['relu', 'elu', 'selu', 'sigmoid', 'tanh']
    # get_activation = lambda i: activations[max(min(get_int(i), 4), 0)]

    # return_params = {
    #     'adamax_learning_rate': params['adamax_learning_rate'],
    #     # 'adamax_beta_1': get_float(params['adamax_beta_1']),
    #     # 'adamax_beta_2': get_float(params['adamax_beta_2']),
    #     # 'batch_size': get_int(params['batch_size']),
    #     # 'hidden_layers': max(min(get_int(params['hidden_layers'], 4), 0),
    # }
    # for l in range(1, 3):
    #     return_params[f'l{l}_neurons'] = get_int(params[f'l{l}_neurons'])
    #     return_params[f'l{l}_activation'] = get_activation(params[f'l{l}_activation'])
    #     # return_params[f'l{l}_kernel_regularizer_l1'] = get_float(params[f'l{l}_kernel_regularizer_l1'])
    #     # return_params[f'l{l}_kernel_regularizer_l2'] = get_float(params[f'l{l}_kernel_regularizer_l2'])
    #     return_params[f'l{l}_dropout'] = get_float(params[f'l{l}_dropout'])

    # return return_params

In [124]:
# Fine tune lgb binary classifier with hyperas
from hyperopt import Trials, STATUS_OK, tpe, rand
from hyperas import optim
from hyperas.distributions import choice, uniform

def get_data():
    with open(f'outliers/final/nan_filled_data.pickle', 'rb') as handle:
        data = pickle.load(handle)

    X_train, y_train, X_val, y_val, X_test, y_test = data['X_train'], data['y_train'], data['X_val'], data['y_val'], data['X_test'], data['y_test']
    X = pd.concat([X_train, X_test, X_val])
    y = np.concatenate([y_train, y_test, y_val])
    return X, y

def get_nn_hyperas_model(X, y):
    # adamax_learning_rate={{uniform(0.0001, 0.001)}}
    # # adamax_beta_1={{uniform(0.8, 1)}}
    # # adamax_beta_2={{uniform(0.95, 1)}}
    # # batch_size={{uniform(4, 1024)}}
    # l1_neurons={{uniform(64, 1024)}}
    # l1_activation={{uniform(0, 4)}}
    # # l1_kernel_regularizer_l1={{uniform(0, 0.05)}}
    # # l1_kernel_regularizer_l2={{uniform(0, 0.05)}}
    # l1_dropout={{uniform(0, 0.8)}}
    # l2_neurons={{uniform(64, 1024)}}
    # l2_activation={{uniform(0, 4)}}
    # # l2_kernel_regularizer_l1={{uniform(0, 0.05)}}
    # # l2_kernel_regularizer_l2={{uniform(0, 0.05)}}
    # l2_dropout={{uniform(0, 0.8)}}
   
    params_raw = {}
    #     'adamax_learning_rate': adamax_learning_rate,
    #     # 'adamax_beta_1': adamax_beta_1,
    #     # 'adamax_beta_2': adamax_beta_2,
    #     'l1_neurons': l1_neurons,
    #     'l1_activation': l1_activation,
    #     # 'l1_kernel_regularizer_l1': l1_kernel_regularizer_l1,
    #     # 'l1_kernel_regularizer_l2': l1_kernel_regularizer_l2,
    #     'l1_dropout': l1_dropout,
    #     'l2_neurons': l2_neurons,
    #     'l2_activation': l2_activation,
    #     # 'l2_kernel_regularizer_l1': l2_kernel_regularizer_l1,
    #     # 'l2_kernel_regularizer_l2': l2_kernel_regularizer_l2,
    #     'l2_dropout': l2_dropout,
    # }
    params = convert_nn_params(params_raw)
    print(params)
    cv_result = get_regression_cv_metrics(create_and_fit_nn_regressor_with_params, X, y,
        n_splits=10, patience=5, validation_size=0.05, regressor_params=params, verbose=1)
    return {'loss': cv_result['cv_metrics']['smape'], 'status': STATUS_OK}

In [125]:
from sklearn.model_selection import KFold

best_run = None
best_model = None
space = None
trials=Trials()
best_run, best_model, space = optim.minimize(model=get_nn_hyperas_model,
                                      data=get_data,
                                      algo=tpe.suggest,
                                      max_evals=30,
                                      trials=trials,
                                      notebook_name='regression_nn',
                                      eval_space=True,
                                      return_space=True,
                                      functions=[convert_nn_params, get_regression_cv_metrics, create_and_fit_nn_regressor_with_params, build_nn_model_with_params])

, patience=30, regressor_params=None, verbose=0):
  51:     start = time.time()
  52:     print(f'x shape {X.shape[0]}')
  53:     fit_params = {
  54:         'model__epochs': 10000,
  55:         'model__shuffle': True,
  56:         'model__use_multiprocessing': True,
  57:         'model__verbose': verbose,
  58:         'model__validation_data': (X_val, y_val),
  59:         # 'model__batch_size': regressor_params.pop('batch_size'),
  60:         # 'model__batch_size': 32,
  61:         'model__batch_size': X.shape[0],
  62:         'model__callbacks': [EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=patience)],
  63:     }
  64:     model = TransformedTargetRegressor(
  65:         Pipeline([
  66:             ('powertransform', PowerTransformer()),
  67:             ('model', KerasRegressor(build_nn_model_with_params([X.shape[1]], regressor_params))),
  68:         ]),
  69:         PowerTransformer(),
  70:     ).fit(X, y, **fit_params)
  71:     end = time.ti

KeyboardInterrupt: 

In [252]:
# def create_regression_nn(input_shape):
#     pass

# def create_and_fit_nn_regressor(X, y, X_val, y_val, patience=30, regressor_params={}):
#     es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=patience)
#     start = time.time()
#     model = TransformedTargetRegressor(
#         Pipeline([
#             ('powertransform', PowerTransformer()),
#             ('model', KerasRegressor(build_nn_model([X_train.shape[1]], loss=losses.MeanAbsoluteError()))),
#         ]),
#         PowerTransformer(),
#     ).fit(X_train.values, y_train,
#         model__epochs=10000, 
#         model__validation_data=(X_val.values, y_val),
#         model__verbose=0,
#         model__batch_size=256,
#         model__shuffle=True,
#         model__callbacks=[es],
#     )
#     end = time.time()
#     print(f'model fit time: {end - start}')
#     return model

In [None]:
%%time
nn_optimized = bayes_parameter_opt_nn_regression(X, y, init_round=2, opt_round=50, n_folds=10, verbose=0, patience=10, validation_size=0.05)