In [23]:
import tensorflow as tf
keras = tf.keras
layers = tf.keras.layers
initializers = tf.keras.initializers

from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error as mse
import matplotlib.pyplot as plt
import numpy as np
import pickle
import os
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import SGDRegressor

from data_generator import generator
from encoding.layers import IntegerBaseEncoder, FloatBaseEncoder

In [18]:
class _MLPBlock(keras.layers.Layer):
    def __init__(self, width, droprate=0, regularization=keras.regularizers.L1L2(0), **kwargs):
        super(_MLPBlock, self).__init__()
        self.dense = layers.Dense(width, 
                                  kernel_initializer=initializers.RandomNormal(seed=seed),
                                  kernel_regularizer=regularization, **kwargs)
#         self.activation = layers.ReLU()
#         self.drop = layers.Dropout(droprate)
#         self.bn = layers.BatchNormalization()
        
    def call(self, inputs, **kwargs):
        x = self.dense(inputs)
#         x = self.activation(x)
#         x = self.drop(x)
#         x = self.bn(x)
        return x

# implement policy network
class MLP(keras.Model):
    def __init__(self, input_dim, output_dim, hidden_dim=64, depth=1, **kwargs):
        super().__init__()
        self.depth = depth
        self.hidden_0 = _MLPBlock(hidden_dim, input_shape=(input_dim,), **kwargs)

        for i in range(1, depth):
            setattr(self, f'hidden_{i}', _MLPBlock(hidden_dim))
        self.out = _MLPBlock(output_dim)

    
    def call(self, inputs):
        x = inputs
        for i in range(self.depth):
            x = getattr(self, f'hidden_{i}')(x)
        x = self.out(x)
        return x

In [19]:
seed_list = [1,2,3,4,5]
dist_list = ['normal', 'lognormal', 'exponential']
n_samples = 1000
n_features = 1
mean = 1
std = 0.6
epochs = 100

In [20]:
standard = lambda x:(x - x.mean())/x.std()

# transform_layer = FloatBaseEncoder()
# transform_layer_only_integers = FloatBaseEncoder(only_integers=True)

# tranformations = {'intact': lambda x: x,
#                   'standardization': standard,
#                   'binary explosion': lambda x: transform_layer(x).squeeze(1),
#                   'binary explosion o/i': lambda x: transform_layer_only_integers(x).squeeze(1),
#                   'higher dimensionality': lambda x, n=5: np.power(x, np.arange(1, n))}

In [21]:
x_array = []
y_array = []
for dist_name in dist_list:
    exec(f'x_{dist_name}={{}}\ny_{dist_name}={{}}')
    x_array.append(f'x_{dist_name}')
    y_array.append(f'y_{dist_name}')
    for seed in seed_list:
        tf.random.set_seed(seed)
        gen = generator.DataGenerator(mean=mean, std=std, dist=dist_name, is_int=False)
        x, y = gen.generate(n_features=n_features, n_samples=n_samples)
        x = np.ravel(x)
        order = np.argsort(x).reshape(-1, 1)
        x, y = x[order], y[order]
        exec(f'x_{dist_name}[seed]=x\ny_{dist_name}[seed]=y')

In [24]:
# fig, ax = plt.subplots(len(x_array), len(y_array), figsize=(16, 9))
# fig.tight_layout()
filename = "results/full_experiment_1.pkl"
os.makedirs(os.path.dirname(filename), exist_ok=True)
activation = activation=keras.activations.sigmoid
# transform_layer = IntegerBaseEncoder()
parameters = {}
results = []
for only_integers in [True, False]:
    for encode_sign in [True, False]:   
        for base in range(2,17):


            transform_layer = FloatBaseEncoder(base=base, encode_sign=encode_sign, column_width=32)
            transform_layer_only_integers = FloatBaseEncoder(base=base, encode_sign=encode_sign, column_width=32, only_integers=True)
            tranformations = {'intact': lambda x: x,
                            'standardization': standard,
                            'binary explosion': lambda x: transform_layer(x).squeeze(1),
                            'binary explosion o/i': lambda x: transform_layer_only_integers(x).squeeze(1),
                            'higher dimensionality': lambda x, n=5: np.power(x, np.arange(1, n))}


            for idx, x_name in enumerate(x_array):
                for idy, y_name in enumerate(y_array):            
                    for name_transformation, func_transformation in tranformations.items():
                        
                        result_batch = []
                        parameters = {
                            'seed_list' : seed_list,
                            'dist_list' : dist_list,
                            'n_samples' : n_samples,
                            'n_features' : n_features,
                            'mean' : mean,
                            'std' : std,
                            'epochs' : epochs,
                            'only_integers' : only_integers,
                            'encode_sign' : encode_sign,
                            'base' : base,
                            'transformation' : name_transformation,
                            'datasets' : f'{x_name}/{y_name}'
                        }
                        
                        result_batch.append(parameters)
                        
                        history = np.zeros(epochs)
                        for seed in seed_list:
                            cur_x, cur_y = eval(x_name+'[seed]'), eval(y_name+'[seed]')
                            transformed_x = func_transformation(cur_x)
                            try:
                                model = MLP(transformed_x.shape[1], 1, 128, depth=2, activation=activation)
                                model.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-4), loss='mse')
                                batch = model.fit(transformed_x, cur_y, epochs=epochs, verbose=0)
                                # print(batch.history['loss'])
                                history += np.array(np.log(batch.history['loss']))
                                # print(history)
                                y_hat = model.predict(transformed_x)
                                score = mse(y_hat, cur_y)
                                print(name_transformation, score)
                                # ax[idx, idy].plot(history.history['loss'], '-', alpha=0.7, linewidth=3, label=f'{score:0.3f} : {name_transformation}')
                            except:
                                print(name_transformation, 'NANS')

                        history = history/len(seed_list)
                        result_batch.append(history)
                        results.append(result_batch)

                        with open(f'results/full_experiment_1.pkl', 'wb') as file:
                            pickle.dump(results, file)


                        
                        # print(f'final: {history}')
                        # ax[idx, idy].plot(history, '-', alpha=0.7, linewidth=3, label=f'{score:0.3f} : {name_transformation}')

                    # ax[idx, idy].scatter(cur_x, cur_y, c='black', marker='x', label=f'Ground Truth', s=10)

            #         ax[idx, idy].set_title(f'{x_name}/{y_name}')
            #         ax[idx, idy].legend()
            # plt.suptitle('MLP', y=1.02)
            # plt.show()

intact 1.4786924829469097e-05
intact 1.3470663369651848e-05
intact 1.752248655047113e-05
intact 1.3180756140318547e-05
intact 1.4334318606830746e-05
standardization 6.3190545396959415e-06
standardization 5.523844289620749e-06
standardization 7.693104482748482e-06
standardization 5.564026554159158e-06
standardization 6.3316401596067485e-06
binary explosion 0.0388509165856777
binary explosion 0.03832493330174863
binary explosion 0.03898282640925902
binary explosion 0.03896836950604046
binary explosion 0.03884902751118807
binary explosion o/i 0.03807487389186598
binary explosion o/i 0.03783644659385064
binary explosion o/i 0.037929605585981754
binary explosion o/i 0.03792248378935638
binary explosion o/i 0.037863510387197395
higher dimensionality 0.000816507364440439
higher dimensionality 0.0011013723188639456
higher dimensionality 0.001504145217049147
higher dimensionality 0.0011801331116788139
higher dimensionality 0.0009585353441009273
intact 0.435319977743226
intact 0.4344491201764021

ValueError: Exception encountered when calling layer "float_base_encoder_26" "                 f"(type FloatBaseEncoder).

setting an array element with a sequence. The requested array has an inhomogeneous shape after 2 dimensions. The detected shape was (1000, 1) + inhomogeneous part.

Call arguments received by layer "float_base_encoder_26" "                 f"(type FloatBaseEncoder):
  • inputs=tf.Tensor(shape=(1000, 1), dtype=float32)
  • args=<class 'inspect._empty'>
  • kwargs={'training': 'None'}

In [25]:
objects = []
with (open(f"results/full_experiment_1.pkl", "rb")) as openfile:
    while True:
        try:
            objects.append(pickle.load(openfile))
        except EOFError:
            break


In [26]:
objects

[[{'seed_list': [1, 2, 3, 4, 5],
   'dist_list': ['normal', 'lognormal', 'exponential'],
   'n_samples': 1000,
   'n_features': 1,
   'mean': 1,
   'std': 0.6,
   'epochs': 100,
   'only_integers': True,
   'encode_sign': True,
   'base': 11,
   'transformation': 'standardization',
   'datasets': 'x_normal/y_normal'},
  array([ -1.22326242,  -1.62811246,  -1.65422278,  -1.67518739,
          -1.69539539,  -1.71621408,  -1.74264887,  -1.76669857,
          -1.78888368,  -1.83110719,  -1.87675249,  -1.92277147,
          -1.97816626,  -2.03947684,  -2.1136824 ,  -2.19588406,
          -2.30021079,  -2.40249391,  -2.55536169,  -2.71454764,
          -2.90130971,  -3.12977505,  -3.38340631,  -3.6940539 ,
          -4.03937282,  -4.42978041,  -4.85773663,  -5.30469749,
          -5.88304942,  -6.43973213,  -7.1450316 ,  -7.81136838,
          -8.53298033,  -9.23578391,  -9.94200695, -10.58917529,
         -11.12841685, -11.59830745, -11.83523228, -11.96396886,
         -12.02900001, -12.045